@@ -86,7 +86,7 @@ def __init__(
8686 # noise reduce
8787 self .reduced_noise = (
8888 TorchGate (
89- AUDIO_SAMPLE_RATE ,
89+ self . pipeline . tgt_sr ,
9090 prop_decrease = clean_strength ,
9191 ).to (self .device )
9292 if clean_audio
@@ -244,7 +244,7 @@ def inference(
244244 # Busy wait to keep power manager happy and clocks stable. Running pipeline on-demand seems to lag when the delay between
245245 # voice changer activation is too high.
246246 # https://forums.developer.nvidia.com/t/why-kernel-calculate-speed-got-slower-after-waiting-for-a-while/221059/9
247- self .pipeline .voice_conversion (
247+ audio_model = self .pipeline .voice_conversion (
248248 self .convert_buffer ,
249249 self .pitch_buffer ,
250250 self .pitchf_buffer ,
@@ -260,14 +260,14 @@ def inference(
260260 f0_autotune_strength ,
261261 proposed_pitch ,
262262 proposed_pitch_threshold ,
263+ self .reduced_noise ,
264+ self .board ,
263265 )
264- return None , vol
266+
267+ return torch .zeros (audio_model .shape , dtype = self .dtype , device = self .device ), vol
265268
266269 if vol < self .input_sensitivity :
267- # Busy wait to keep power manager happy and clocks stable. Running pipeline on-demand seems to lag when the delay between
268- # voice changer activation is too high.
269- # https://forums.developer.nvidia.com/t/why-kernel-calculate-speed-got-slower-after-waiting-for-a-while/221059/9
270- self .pipeline .voice_conversion (
270+ audio_model = self .pipeline .voice_conversion (
271271 self .convert_buffer ,
272272 self .pitch_buffer ,
273273 self .pitchf_buffer ,
@@ -283,9 +283,11 @@ def inference(
283283 f0_autotune_strength ,
284284 proposed_pitch ,
285285 proposed_pitch_threshold ,
286+ self .reduced_noise ,
287+ self .board ,
286288 )
287289
288- return None , vol
290+ return torch . zeros ( audio_model . shape , dtype = self . dtype , device = self . device ) , vol
289291
290292 circular_write (audio_input_16k , self .convert_buffer )
291293
@@ -305,18 +307,11 @@ def inference(
305307 f0_autotune_strength ,
306308 proposed_pitch ,
307309 proposed_pitch_threshold ,
310+ self .reduced_noise ,
311+ self .board ,
308312 )
309313
310314 audio_out : torch .Tensor = self .resample_out (audio_model * torch .sqrt (vol_t ))
311-
312- if self .reduced_noise is not None :
313- audio_out = self .reduced_noise (audio_out .unsqueeze (0 )).squeeze (0 )
314- if self .board is not None :
315- audio_out = torch .as_tensor (
316- self .board (audio_out .cpu ().numpy (), AUDIO_SAMPLE_RATE ),
317- device = self .device ,
318- )
319-
320315 return audio_out , vol
321316
322317 def __del__ (self ):
@@ -424,9 +419,9 @@ def process_audio(
424419 proposed_pitch_threshold ,
425420 )
426421
427- if audio is None :
422+ # if audio is None:
428423 # In case there's an actual silence - send full block with zeros
429- return np .zeros (block_size , dtype = np .float32 ), vol
424+ # return np.zeros(block_size, dtype=np.float32), vol
430425
431426 conv_input = audio [None , None , : self .crossfade_frame + self .sola_search_frame ]
432427 cor_nom = F .conv1d (conv_input , self .sola_buffer [None , None , :])
0 commit comments