Skip to content

Commit 597ff8e

Browse files
authored
Merge pull request #1136 from Vidalnt/main
Realtime
2 parents 50e8a9f + 5cc98a0 commit 597ff8e

14 files changed

Lines changed: 1587 additions & 5 deletions

File tree

app.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from tabs.voice_blender.voice_blender import voice_blender_tab
3131
from tabs.plugins.plugins import plugins_tab
3232
from tabs.settings.settings import settings_tab
33-
33+
from tabs.realtime.realtime import realtime_tab
3434
# Run prerequisites
3535
from core import run_prerequisites_script
3636

@@ -89,6 +89,9 @@
8989

9090
with gr.Tab(i18n("Voice Blender")):
9191
voice_blender_tab()
92+
93+
with gr.Tab(i18n("Realtime")):
94+
realtime_tab()
9295

9396
with gr.Tab(i18n("Plugins")):
9497
plugins_tab()

core.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,7 @@ def parse_arguments():
675675
"crepe-tiny",
676676
"rmvpe",
677677
"fcpe",
678+
"swift",
678679
"hybrid[crepe+rmvpe]",
679680
"hybrid[crepe+fcpe]",
680681
"hybrid[rmvpe+fcpe]",
@@ -1198,6 +1199,7 @@ def parse_arguments():
11981199
"crepe-tiny",
11991200
"rmvpe",
12001201
"fcpe",
1202+
"swift",
12011203
"hybrid[crepe+rmvpe]",
12021204
"hybrid[crepe+fcpe]",
12031205
"hybrid[rmvpe+fcpe]",
@@ -1683,6 +1685,7 @@ def parse_arguments():
16831685
"crepe-tiny",
16841686
"rmvpe",
16851687
"fcpe",
1688+
"swift",
16861689
"hybrid[crepe+rmvpe]",
16871690
"hybrid[crepe+fcpe]",
16881691
"hybrid[rmvpe+fcpe]",

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ torchvision==0.22.1; sys_platform == 'darwin'
3030
torchvision==0.22.1+cu128; sys_platform == 'linux' or sys_platform == 'win32'
3131
torchcrepe==0.0.23
3232
torchfcpe
33+
swift_f0
3334
einops
3435
transformers==4.44.2
3536

@@ -45,5 +46,5 @@ tensorboardX
4546
edge-tts==7.2.0
4647
pypresence
4748
beautifulsoup4
48-
49-
49+
sounddevice
50+
webrtcvad

rvc/infer/pipeline.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
now_dir = os.getcwd()
1414
sys.path.append(now_dir)
1515

16-
from rvc.lib.predictors.f0 import CREPE, FCPE, RMVPE
16+
from rvc.lib.predictors.f0 import CREPE, FCPE, RMVPE, SWIFT
1717

1818
import logging
1919

@@ -244,6 +244,12 @@ def get_f0(
244244
)
245245
f0 = model.get_f0(x, p_len, filter_radius=0.006)
246246
del model
247+
elif f0_method == "swift":
248+
model = SWIFT(
249+
device=self.device, sample_rate=self.sample_rate, hop_size=self.window
250+
)
251+
f0 = model.get_f0(x, self.f0_min, self.f0_max, p_len, confidence_threshold=0.887)
252+
del model
247253

248254
# f0 adjustments
249255
if f0_autotune is True:

rvc/lib/predictors/f0.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
from rvc.lib.predictors.RMVPE import RMVPE0Predictor
55
from torchfcpe import spawn_bundled_infer_model
66
import torchcrepe
7-
7+
from swift_f0 import SwiftF0
8+
import numpy as np
89

910
class RMVPE:
1011
def __init__(self, device, model_name="rmvpe.pt", sample_rate=16000, hop_size=160):
@@ -82,3 +83,30 @@ def get_f0(self, x, p_len=None, filter_radius=0.006):
8283
)
8384

8485
return f0
86+
87+
class SWIFT:
88+
def __init__(self, device, sample_rate=16000, hop_size=160):
89+
self.device = "cpu"
90+
self.sample_rate = sample_rate
91+
self.hop_size = hop_size
92+
93+
def get_f0(self, x, f0_min=50, f0_max=1100, p_len=None, confidence_threshold=0.9):
94+
if torch.is_tensor(x):
95+
x = x.cpu().numpy()
96+
97+
if p_len is None:
98+
p_len = x.shape[0] // self.hop_size
99+
100+
f0_min = max(f0_min, 46.875)
101+
f0_max = min(f0_max, 2093.75)
102+
103+
detector = SwiftF0(fmin=f0_min, fmax=f0_max, confidence_threshold=confidence_threshold)
104+
result = detector.detect_from_array(x, self.sample_rate)
105+
if len(result.timestamps) == 0:
106+
return np.zeros(p_len)
107+
target_time = (np.arange(p_len) * self.hop_size + self.hop_size / 2) / self.sample_rate
108+
pitch = np.nan_to_num(result.pitch_hz, nan=0.0)
109+
pitch[~result.voicing] = 0.0
110+
f0 = np.interp(target_time, result.timestamps, pitch, left=0.0, right=0.0)
111+
112+
return f0

tabs/inference/inference.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,7 @@ def inference_tab():
948948
"crepe-tiny",
949949
"rmvpe",
950950
"fcpe",
951+
"swift",
951952
],
952953
value="rmvpe",
953954
interactive=True,
@@ -1582,6 +1583,7 @@ def enforce_terms_batch(terms_accepted, *args):
15821583
"crepe-tiny",
15831584
"rmvpe",
15841585
"fcpe",
1586+
"swift",
15851587
],
15861588
value="rmvpe",
15871589
interactive=True,

0 commit comments

Comments
 (0)