@@ -218,7 +218,7 @@ def convert_audio(
218218 ):
219219 """
220220 Performs voice conversion on the input audio.
221-
221+
222222 Args:
223223 pitch (int): Key for F0 up-sampling.
224224 index_rate (float): Rate for index matching.
@@ -245,26 +245,26 @@ def convert_audio(
245245 if not model_path :
246246 print ("No model path provided. Aborting conversion." )
247247 return
248-
248+
249249 self .get_vc (model_path , sid )
250-
250+
251251 start_time = time .time ()
252252 print (f"Converting audio '{ audio_input_path } '..." )
253-
253+
254254 audio = load_audio_infer (
255255 audio_input_path ,
256256 16000 ,
257257 ** kwargs ,
258258 )
259259 audio_max = np .abs (audio ).max () / 0.95
260-
260+
261261 if audio_max > 1 :
262262 audio /= audio_max
263-
263+
264264 if not self .hubert_model or embedder_model != self .last_embedder_model :
265265 self .load_hubert (embedder_model , embedder_model_custom )
266266 self .last_embedder_model = embedder_model
267-
267+
268268 file_index = (
269269 index_path .strip ()
270270 .strip ('"' )
@@ -273,17 +273,17 @@ def convert_audio(
273273 .strip ()
274274 .replace ("trained" , "added" )
275275 )
276-
276+
277277 if self .tgt_sr != resample_sr >= 16000 :
278278 self .tgt_sr = resample_sr
279-
279+
280280 if split_audio :
281281 chunks , intervals = process_audio (audio , 16000 )
282282 print (f"Audio split into { len (chunks )} chunks for processing." )
283283 else :
284284 chunks = []
285285 chunks .append (audio )
286-
286+
287287 converted_chunks = []
288288 for c in chunks :
289289 audio_opt = self .vc .pipeline (
@@ -307,42 +307,41 @@ def convert_audio(
307307 converted_chunks .append (audio_opt )
308308 if split_audio :
309309 print (f"Converted audio chunk { len (converted_chunks )} " )
310-
310+
311311 if split_audio :
312312 audio_opt = merge_audio (
313313 chunks , converted_chunks , intervals , 16000 , self .tgt_sr
314314 )
315315 else :
316316 audio_opt = converted_chunks [0 ]
317-
317+
318318 if clean_audio :
319319 cleaned_audio = self .remove_audio_noise (
320320 audio_opt , self .tgt_sr , clean_strength
321321 )
322322 if cleaned_audio is not None :
323323 audio_opt = cleaned_audio
324-
324+
325325 if post_process :
326326 audio_opt = self .post_process_audio (
327327 audio_input = audio_opt ,
328328 sample_rate = self .tgt_sr ,
329329 ** kwargs ,
330330 )
331-
331+
332332 sf .write (audio_output_path , audio_opt , self .tgt_sr , format = "WAV" )
333333 output_path_format = audio_output_path .replace (
334334 ".wav" , f".{ export_format .lower ()} "
335335 )
336336 audio_output_path = self .convert_audio_format (
337337 audio_output_path , output_path_format , export_format
338338 )
339-
339+
340340 elapsed_time = time .time () - start_time
341341 print (
342342 f"Conversion completed at '{ audio_output_path } ' in { elapsed_time :.2f} seconds."
343343 )
344-
345-
344+
346345 def convert_audio_batch (
347346 self ,
348347 audio_input_paths : str ,
@@ -351,7 +350,7 @@ def convert_audio_batch(
351350 ):
352351 """
353352 Performs voice conversion on a batch of input audio files.
354-
353+
355354 Args:
356355 audio_input_paths (str): List of paths to the input audio files.
357356 audio_output_path (str): Path to the output audio file.
0 commit comments