|
4 | 4 | from rvc.lib.predictors.RMVPE import RMVPE0Predictor |
5 | 5 | from torchfcpe import spawn_bundled_infer_model |
6 | 6 | import torchcrepe |
7 | | - |
| 7 | +from swift_f0 import SwiftF0 |
| 8 | +import numpy as np |
8 | 9 |
|
9 | 10 | class RMVPE: |
10 | 11 | def __init__(self, device, model_name="rmvpe.pt", sample_rate=16000, hop_size=160): |
@@ -82,3 +83,30 @@ def get_f0(self, x, p_len=None, filter_radius=0.006): |
82 | 83 | ) |
83 | 84 |
|
84 | 85 | return f0 |
| 86 | + |
| 87 | +class SWIFT: |
| 88 | + def __init__(self, device, sample_rate=16000, hop_size=160): |
| 89 | + self.device = "cpu" |
| 90 | + self.sample_rate = sample_rate |
| 91 | + self.hop_size = hop_size |
| 92 | + |
| 93 | + def get_f0(self, x, f0_min=50, f0_max=1100, p_len=None, confidence_threshold=0.9): |
| 94 | + if torch.is_tensor(x): |
| 95 | + x = x.cpu().numpy() |
| 96 | + |
| 97 | + if p_len is None: |
| 98 | + p_len = x.shape[0] // self.hop_size |
| 99 | + |
| 100 | + f0_min = max(f0_min, 46.875) |
| 101 | + f0_max = min(f0_max, 2093.75) |
| 102 | + |
| 103 | + detector = SwiftF0(fmin=f0_min, fmax=f0_max, confidence_threshold=confidence_threshold) |
| 104 | + result = detector.detect_from_array(x, self.sample_rate) |
| 105 | + if len(result.timestamps) == 0: |
| 106 | + return np.zeros(p_len) |
| 107 | + target_time = (np.arange(p_len) * self.hop_size + self.hop_size / 2) / self.sample_rate |
| 108 | + pitch = np.nan_to_num(result.pitch_hz, nan=0.0) |
| 109 | + pitch[~result.voicing] = 0.0 |
| 110 | + f0 = np.interp(target_time, result.timestamps, pitch, left=0.0, right=0.0) |
| 111 | + |
| 112 | + return f0 |
0 commit comments