feat(synthesizer): show timing for voice conversion

idiap · Jan 14, 2025 · d7861a7 · d7861a7
1 parent 58a11ab
commit d7861a7
Showing 1 changed file with 10 additions and 3 deletions.
diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
@@ -272,11 +272,18 @@ def save_wav(self, wav: List[int], path: str, pipe_out=None) -> None:
             wav = np.array(wav)
         save_wav(wav=wav, path=path, sample_rate=self.output_sample_rate, pipe_out=pipe_out)
 
-    def voice_conversion(self, source_wav: str, target_wav: str) -> List[int]:
-        output = self.vc_model.voice_conversion(source_wav, target_wav)
+    def voice_conversion(self, source_wav: str, target_wav: str, **kwargs) -> List[int]:
+        start_time = time.time()
+        output = self.vc_model.voice_conversion(source_wav, target_wav, **kwargs)
         if self.vocoder_model is not None:
             output = self.vocoder_model.inference(output)
-        return output.squeeze()
+
+        output = output.squeeze()
+        process_time = time.time() - start_time
+        audio_time = len(output) / self.output_sample_rate
+        logger.info("Processing time: %.3f", process_time)
+        logger.info("Real-time factor: %.3f", process_time / audio_time)
+        return output
 
     def tts(
         self,