Skip to content

Commit 6d22d0b

Browse files
authoredNov 5, 2024
Merge pull request #618 from FunAudioLLM/dev/lyuxiang.lx
Dev/lyuxiang.lx
2 parents 3914b54 + 487701c commit 6d22d0b

File tree

5 files changed

+8
-2
lines changed

5 files changed

+8
-2
lines changed
 

‎cosyvoice/cli/cosyvoice.py

+2
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ def inference_sft(self, tts_text, spk_id, stream=False, speed=1.0):
6767
def inference_zero_shot(self, tts_text, prompt_text, prompt_speech_16k, stream=False, speed=1.0):
6868
prompt_text = self.frontend.text_normalize(prompt_text, split=False)
6969
for i in tqdm(self.frontend.text_normalize(tts_text, split=True)):
70+
if len(i) < 0.5 * len(prompt_text):
71+
logging.warning('synthesis text {} too short than prompt text {}, this may lead to bad performance'.format(i, prompt_text))
7072
model_input = self.frontend.frontend_zero_shot(i, prompt_text, prompt_speech_16k)
7173
start_time = time.time()
7274
logging.info('synthesis text {}'.format(i))

‎cosyvoice/llm/llm.py

+3
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ def inference(
202202
att_mask=torch.tril(torch.ones((1, lm_input.shape[1], lm_input.shape[1]),
203203
device=lm_input.device)).to(torch.bool))
204204
logp = self.llm_decoder(y_pred[:, -1]).log_softmax(dim=-1)
205+
# force continue decode first token
206+
if i == 0:
207+
logp[:, self.speech_token_size] = -float('inf')
205208
top_ids = self.sampling_ids(logp.squeeze(dim=0), out_tokens, sampling, ignore_eos=True if i < min_len else False).item()
206209
if top_ids == self.speech_token_size:
207210
break

‎examples/libritts/cosyvoice/conf/cosyvoice.fromscratch.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ mel_spec_transform1: !name:matcha.utils.audio.mel_spectrogram
141141
hop_size: 256
142142
win_size: 1024
143143
fmin: 0
144-
fmax: 8000
144+
fmax: null
145145
center: False
146146
hifigan: !new:cosyvoice.hifigan.hifigan.HiFiGan
147147
generator: !ref <hift>

‎examples/libritts/cosyvoice/conf/cosyvoice.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ mel_spec_transform1: !name:matcha.utils.audio.mel_spectrogram
141141
hop_size: 256
142142
win_size: 1024
143143
fmin: 0
144-
fmax: 8000
144+
fmax: null
145145
center: False
146146
hifigan: !new:cosyvoice.hifigan.hifigan.HiFiGan
147147
generator: !ref <hift>

‎requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ gdown==5.1.0
66
gradio==4.32.2
77
grpcio==1.57.0
88
grpcio-tools==1.57.0
9+
huggingface-hub==0.23.5
910
hydra-core==1.3.2
1011
HyperPyYAML==1.2.2
1112
inflect==7.3.1

0 commit comments

Comments
 (0)