Fix

jordimas · jordimas · commit 8b193910cd87 · 2025-01-12T10:25:54.000+01:00
diff --git a/open_dubbing/text_to_speech_mms.py b/open_dubbing/text_to_speech_mms.py
@@ -55,6 +55,14 @@ def _convert_text_to_speech(
         )
         inputs = tokenizer(text, return_tensors="pt").to(self.device)
 
+        _type = inputs["input_ids"].dtype
+
+        if _type != torch.int64:
+            logger().error(
+                f"text_to_speech.client.synthesize_speech: Fixing type '{_type}' to 'long' for text '{text}'"
+            )
+            inputs["input_ids"] = inputs["input_ids"].long()
+
         # Generate waveform
         with torch.no_grad():
             output = model(**inputs).waveform
diff --git a/sc.sh b/sc.sh
@@ -7,6 +7,7 @@ branch_name=$(git rev-parse --abbrev-ref HEAD)
 declare -a target_languages=("cat")  # Catalan (cat) and French (fra)
 declare -a inputs=($(find ../dubbing/od-videos/ -type f -name "*.mp4"))
 declare -a inputs=("videos/jordi.mp4" )
+declare -a inputs=("../dubbing/od-videos/english/openuniversity.mp4" )
 
 for input_file in "${inputs[@]}"; do
   output_directory="output/$(basename "${input_file%.*}").${branch_name}/"
@@ -21,9 +22,7 @@ for input_file in "${inputs[@]}"; do
       --target_language="$language" \
       --translator="apertium" \
       --apertium_server=http://localhost:8500/ \
-      --tts=api \
-      --tts_api_server=http://localhost:8100/ \
-      --target_language_region="central" \
+      --tts=mms \
       --device=cpu \
       --dubbed_subtitles\
       --log_level=INFO