comments

jordimas · jordimas · commit 70212285f7f4 · 2025-01-12T21:36:30.000+01:00
diff --git a/open_dubbing/text_to_speech_mms.py b/open_dubbing/text_to_speech_mms.py
@@ -55,10 +55,11 @@ def _convert_text_to_speech(
         )
         inputs = tokenizer(text, return_tensors="pt").to(self.device)
 
-        # Model returns for some sequence of text no result
+        # Model returns for some sequences of tokens no result
         if inputs["input_ids"].shape[1] == 0:
             sampling_rate = 16000
             duration_seconds = 1
+            # If we fill the array with (np.zeros) the ffmpeg process later fails
             output_np = np.ones(sampling_rate * duration_seconds, dtype=np.int16)
             logger().warning(
                 f"TextToSpeechMMS._convert_text_to_speech. Model returns input tokens for text '{text}', generating an empty WAV file."