Skip to content

Commit 8b19391

Browse files
committed
Fix
1 parent 3fa6c37 commit 8b19391

File tree

2 files changed

+10
-3
lines changed

2 files changed

+10
-3
lines changed

open_dubbing/text_to_speech_mms.py

+8
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ def _convert_text_to_speech(
5555
)
5656
inputs = tokenizer(text, return_tensors="pt").to(self.device)
5757

58+
_type = inputs["input_ids"].dtype
59+
60+
if _type != torch.int64:
61+
logger().error(
62+
f"text_to_speech.client.synthesize_speech: Fixing type '{_type}' to 'long' for text '{text}'"
63+
)
64+
inputs["input_ids"] = inputs["input_ids"].long()
65+
5866
# Generate waveform
5967
with torch.no_grad():
6068
output = model(**inputs).waveform

sc.sh

+2-3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ branch_name=$(git rev-parse --abbrev-ref HEAD)
77
declare -a target_languages=("cat") # Catalan (cat) and French (fra)
88
declare -a inputs=($(find ../dubbing/od-videos/ -type f -name "*.mp4"))
99
declare -a inputs=("videos/jordi.mp4" )
10+
declare -a inputs=("../dubbing/od-videos/english/openuniversity.mp4" )
1011

1112
for input_file in "${inputs[@]}"; do
1213
output_directory="output/$(basename "${input_file%.*}").${branch_name}/"
@@ -21,9 +22,7 @@ for input_file in "${inputs[@]}"; do
2122
--target_language="$language" \
2223
--translator="apertium" \
2324
--apertium_server=http://localhost:8500/ \
24-
--tts=api \
25-
--tts_api_server=http://localhost:8100/ \
26-
--target_language_region="central" \
25+
--tts=mms \
2726
--device=cpu \
2827
--dubbed_subtitles\
2928
--log_level=INFO

0 commit comments

Comments
 (0)