IAHispano
diff --git a/‎app.py‎
Lines changed: 4 additions & 1 deletion b/‎app.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎core.py‎
Lines changed: 3 additions & 0 deletions b/‎core.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 3 additions & 2 deletions b/‎requirements.txt‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎rvc/infer/pipeline.py‎
Lines changed: 7 additions & 1 deletion b/‎rvc/infer/pipeline.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎rvc/lib/predictors/f0.py‎
Lines changed: 29 additions & 1 deletion b/‎rvc/lib/predictors/f0.py‎
Lines changed: 29 additions & 1 deletion
diff --git a/‎tabs/inference/inference.py‎
Lines changed: 2 additions & 0 deletions b/‎tabs/inference/inference.py‎
Lines changed: 2 additions & 0 deletions
@@ -30,7 +30,7 @@
 from tabs.voice_blender.voice_blender import voice_blender_tab
 from tabs.plugins.plugins import plugins_tab
 from tabs.settings.settings import settings_tab
-
+from tabs.realtime.realtime import realtime_tab
 # Run prerequisites
 from core import run_prerequisites_script
 
@@ -89,6 +89,9 @@
 
     with gr.Tab(i18n("Voice Blender")):
         voice_blender_tab()
+    
+    with gr.Tab(i18n("Realtime")):
+        realtime_tab()
 
     with gr.Tab(i18n("Plugins")):
         plugins_tab()
 
@@ -675,6 +675,7 @@ def parse_arguments():
             "crepe-tiny",
             "rmvpe",
             "fcpe",
+            "swift",
             "hybrid[crepe+rmvpe]",
             "hybrid[crepe+fcpe]",
             "hybrid[rmvpe+fcpe]",
@@ -1198,6 +1199,7 @@ def parse_arguments():
             "crepe-tiny",
             "rmvpe",
             "fcpe",
+            "swift",
             "hybrid[crepe+rmvpe]",
             "hybrid[crepe+fcpe]",
             "hybrid[rmvpe+fcpe]",
@@ -1683,6 +1685,7 @@ def parse_arguments():
             "crepe-tiny",
             "rmvpe",
             "fcpe",
+            "swift",
             "hybrid[crepe+rmvpe]",
             "hybrid[crepe+fcpe]",
             "hybrid[rmvpe+fcpe]",
 
@@ -30,6 +30,7 @@ torchvision==0.22.1; sys_platform == 'darwin'
 torchvision==0.22.1+cu128; sys_platform == 'linux' or sys_platform == 'win32'
 torchcrepe==0.0.23
 torchfcpe
+swift_f0
 einops
 transformers==4.44.2
 
@@ -45,5 +46,5 @@ tensorboardX
 edge-tts==7.2.0
 pypresence
 beautifulsoup4
-
-
+sounddevice
+webrtcvad
@@ -13,7 +13,7 @@
 now_dir = os.getcwd()
 sys.path.append(now_dir)
 
-from rvc.lib.predictors.f0 import CREPE, FCPE, RMVPE
+from rvc.lib.predictors.f0 import CREPE, FCPE, RMVPE, SWIFT
 
 import logging
 
@@ -244,6 +244,12 @@ def get_f0(
             )
             f0 = model.get_f0(x, p_len, filter_radius=0.006)
             del model
+        elif f0_method == "swift":
+            model = SWIFT(
+                device=self.device, sample_rate=self.sample_rate, hop_size=self.window
+            )
+            f0 = model.get_f0(x, self.f0_min, self.f0_max, p_len, confidence_threshold=0.887)
+            del model
 
         # f0 adjustments
         if f0_autotune is True:
 
@@ -4,7 +4,8 @@
 from rvc.lib.predictors.RMVPE import RMVPE0Predictor
 from torchfcpe import spawn_bundled_infer_model
 import torchcrepe
-
+from swift_f0 import SwiftF0
+import numpy as np
 
 class RMVPE:
     def __init__(self, device, model_name="rmvpe.pt", sample_rate=16000, hop_size=160):
@@ -82,3 +83,30 @@ def get_f0(self, x, p_len=None, filter_radius=0.006):
         )
 
         return f0
+
+class SWIFT:
+    def __init__(self, device, sample_rate=16000, hop_size=160):
+        self.device = "cpu"
+        self.sample_rate = sample_rate
+        self.hop_size = hop_size
+
+    def get_f0(self, x, f0_min=50, f0_max=1100, p_len=None, confidence_threshold=0.9):
+        if torch.is_tensor(x):
+            x = x.cpu().numpy()
+            
+        if p_len is None:
+            p_len = x.shape[0] // self.hop_size
+
+        f0_min = max(f0_min, 46.875)
+        f0_max = min(f0_max, 2093.75)
+        
+        detector = SwiftF0(fmin=f0_min, fmax=f0_max, confidence_threshold=confidence_threshold)
+        result = detector.detect_from_array(x, self.sample_rate)
+        if len(result.timestamps) == 0:
+            return np.zeros(p_len)
+        target_time = (np.arange(p_len) * self.hop_size + self.hop_size / 2) / self.sample_rate
+        pitch = np.nan_to_num(result.pitch_hz, nan=0.0)
+        pitch[~result.voicing] = 0.0
+        f0 = np.interp(target_time, result.timestamps, pitch, left=0.0, right=0.0)
+        
+        return f0
@@ -948,6 +948,7 @@ def inference_tab():
                         "crepe-tiny",
                         "rmvpe",
                         "fcpe",
+                        "swift",
                     ],
                     value="rmvpe",
                     interactive=True,
@@ -1582,6 +1583,7 @@ def enforce_terms_batch(terms_accepted, *args):
                         "crepe-tiny",
                         "rmvpe",
                         "fcpe",
+                        "swift",
                     ],
                     value="rmvpe",
                     interactive=True,