Skip to content

Commit 4df0683

Browse files
committed
add vllm_codec_engine
1 parent c37c00f commit 4df0683

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

cosyvoice/cli/cosyvoice.py

+9
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False):
4949
self.model.load('{}/llm.pt'.format(model_dir),
5050
'{}/flow.pt'.format(model_dir),
5151
'{}/hift.pt'.format(model_dir))
52+
self.vllm_codec_engine = None
5253
if load_jit:
5354
self.model.load_jit('{}/llm.text_encoder.{}.zip'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'),
5455
'{}/llm.llm.{}.zip'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'),
@@ -149,8 +150,16 @@ def __init__(self, model_dir, load_jit=False, load_trt=False, fp16=False, use_vl
149150
self.model.load('{}/llm.pt'.format(model_dir),
150151
'{}/flow.pt'.format(model_dir),
151152
'{}/hift.pt'.format(model_dir))
153+
self.vllm_codec_engine = None
152154
if use_vllm:
155+
from vllm import EngineArgs, LLMEngine
153156
self.model.export_codec_vllm(''.join([model_dir, '/codec_vllm_model']))
157+
engine_args = EngineArgs(model=''.join([model_dir, '/codec_vllm_model']),
158+
skip_tokenizer_init=True,
159+
gpu_memory_utilization=0.1)
160+
self.vllm_codec_engine = LLMEngine.from_engine_args(engine_args)
161+
self.model.llm.vllm_codec_engine = self.vllm_codec_engine
162+
154163
if load_jit:
155164
self.model.load_jit('{}/flow.encoder.{}.zip'.format(model_dir, 'fp16' if self.fp16 is True else 'fp32'))
156165
if load_trt:

cosyvoice/llm/llm.py

+1
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ def __init__(
282282
# 4. sampling method
283283
self.sampling = sampling
284284
self.mix_ratio = mix_ratio
285+
self.vllm_codec_engine = None
285286

286287
@torch.inference_mode()
287288
def inference(

0 commit comments

Comments
 (0)