File tree 3 files changed +11
-4
lines changed 3 files changed +11
-4
lines changed Original file line number Diff line number Diff line change 7
7
from vllm .engine .async_llm_engine import AsyncLLMEngine
8
8
from vllm .entrypoints .openai .protocol import ChatCompletionRequest
9
9
from vllm .entrypoints .openai .serving_chat import OpenAIServingChat
10
+ from vllm .entrypoints .openai .serving_engine import BaseModelPath
10
11
from vllm .transformers_utils .tokenizer import get_tokenizer
11
12
12
13
MODEL_NAME = "openai-community/gpt2"
13
14
CHAT_TEMPLATE = "Dummy chat template for testing {}"
14
15
16
+ BASE_MODEL_PATHS = [BaseModelPath (name = MODEL_NAME , model_path = MODEL_NAME )]
17
+
15
18
16
19
@dataclass
17
20
class MockModelConfig :
@@ -37,7 +40,7 @@ async def _async_serving_chat_init():
37
40
38
41
serving_completion = OpenAIServingChat (engine ,
39
42
model_config ,
40
- served_model_names = [ MODEL_NAME ] ,
43
+ BASE_MODEL_PATHS ,
41
44
response_role = "assistant" ,
42
45
chat_template = CHAT_TEMPLATE ,
43
46
lora_modules = None ,
@@ -57,7 +60,7 @@ def test_serving_chat_should_set_correct_max_tokens():
57
60
58
61
serving_chat = OpenAIServingChat (mock_engine ,
59
62
MockModelConfig (),
60
- served_model_names = [ MODEL_NAME ] ,
63
+ BASE_MODEL_PATHS ,
61
64
response_role = "assistant" ,
62
65
chat_template = CHAT_TEMPLATE ,
63
66
lora_modules = None ,
Original file line number Diff line number Diff line change 17
17
# yapf: enable
18
18
from vllm .entrypoints .openai .serving_chat import OpenAIServingChat
19
19
from vllm .entrypoints .openai .serving_embedding import OpenAIServingEmbedding
20
+ from vllm .entrypoints .openai .serving_engine import BaseModelPath
20
21
from vllm .usage .usage_lib import UsageContext
21
22
from vllm .utils import FlexibleArgumentParser , random_uuid
22
23
from vllm .version import __version__ as VLLM_VERSION
@@ -140,6 +141,10 @@ async def main(args):
140
141
141
142
# When using single vLLM without engine_use_ray
142
143
model_config = await engine .get_model_config ()
144
+ base_model_paths = [
145
+ BaseModelPath (name = name , model_path = args .model )
146
+ for name in served_model_names
147
+ ]
143
148
144
149
if args .disable_log_requests :
145
150
request_logger = None
@@ -150,7 +155,7 @@ async def main(args):
150
155
openai_serving_chat = OpenAIServingChat (
151
156
engine ,
152
157
model_config ,
153
- served_model_names ,
158
+ base_model_paths ,
154
159
args .response_role ,
155
160
lora_modules = None ,
156
161
prompt_adapters = None ,
Original file line number Diff line number Diff line change @@ -495,4 +495,3 @@ async def unload_lora_adapter(
495
495
496
496
def _is_model_supported (self , model_name ):
497
497
return any (model .name == model_name for model in self .base_model_paths )
498
-
You can’t perform that action at this time.
0 commit comments