Skip to content

Commit f280558

Browse files
committed
update func export_codec_vllm
1 parent f6a18ee commit f280558

File tree

3 files changed

+5
-2
lines changed

3 files changed

+5
-2
lines changed

cosyvoice/cli/cosyvoice.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False, use_vl
156156
self.model.export_codec_vllm(''.join([model_dir, '/codec_vllm_model']))
157157
engine_args = EngineArgs(model=''.join([model_dir, '/codec_vllm_model']),
158158
skip_tokenizer_init=True,
159-
gpu_memory_utilization=0.1)
159+
gpu_memory_utilization=0.2)
160160
self.vllm_codec_engine = LLMEngine.from_engine_args(engine_args)
161161
self.model.vllm_codec_engine = self.vllm_codec_engine
162162

cosyvoice/cli/model.py

+3
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,9 @@ def export_codec_vllm(self, model_path):
347347
self.llm.llm.model.to(dtype)
348348
tmp_vocab_size = self.llm.llm.model.config.vocab_size
349349
tmp_tie_embedding = self.llm.llm.model.config.tie_word_embeddings
350+
del self.llm.llm.model.generation_config.eos_token_id
351+
del self.llm.llm.model.config.bos_token_id
352+
del self.llm.llm.model.config.eos_token_id
350353
self.llm.llm.model.config.vocab_size = pad_vocab_size
351354
self.llm.llm.model.config.tie_word_embeddings = False
352355
self.llm.llm.model.config.use_bias = True

cosyvoice/llm/llm.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ def inference(
343343
max_tokens=max_len)
344344
request_id = uuid.uuid4()
345345
vllm_codec_engine.add_request(request_id,
346-
{"prompt_embeds": lm_input.to(torch.bfloat16).to(device)},
346+
{"prompt_embeds": lm_input.squeeze(0).to(torch.bfloat16).to(device)},
347347
sampling_params)
348348
## generator
349349
out_token_ids = []

0 commit comments

Comments
 (0)