Skip to content

Commit 75f1735

Browse files
committed
[doc] fix multimodal example script
We can't run the script with `--help` to see the help message without connecting to a running vLLM server. This change allows us to do that. We also improve the error message when the script cannot get a list of models from vLLM. Signed-off-by: David Xia <david@davidxia.com>
1 parent 65334ef commit 75f1735

File tree

2 files changed

+40
-11
lines changed

2 files changed

+40
-11
lines changed

examples/online_serving/openai_chat_completion_client_for_multimodal.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""An example showing how to use vLLM to serve multimodal models
2+
"""An example showing how to use vLLM to serve multimodal models
33
and run online serving with OpenAI client.
44
55
Launch the vLLM server with the following command:
@@ -12,12 +12,18 @@
1212
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
1313
1414
(audio inference with Ultravox)
15-
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096
15+
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b \
16+
--max-model-len 4096 --trust-remote-code
17+
18+
run the script with
19+
python openai_chat_completion_client_for_multimodal.py --chat-type audio
1620
"""
21+
1722
import base64
1823

1924
import requests
2025
from openai import OpenAI
26+
from utils import get_first_model
2127

2228
from vllm.utils import FlexibleArgumentParser
2329

@@ -31,9 +37,6 @@
3137
base_url=openai_api_base,
3238
)
3339

34-
models = client.models.list()
35-
model = models.data[0].id
36-
3740

3841
def encode_base64_content_from_url(content_url: str) -> str:
3942
"""Encode a content retrieved from a remote url to base64 format."""
@@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str:
4649

4750

4851
# Text-only inference
49-
def run_text_only() -> None:
52+
def run_text_only(model: str) -> None:
5053
chat_completion = client.chat.completions.create(
5154
messages=[{
5255
"role": "user",
@@ -61,7 +64,7 @@ def run_text_only() -> None:
6164

6265

6366
# Single-image input inference
64-
def run_single_image() -> None:
67+
def run_single_image(model: str) -> None:
6568

6669
## Use image url in the payload
6770
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
@@ -117,7 +120,7 @@ def run_single_image() -> None:
117120

118121

119122
# Multi-image input inference
120-
def run_multi_image() -> None:
123+
def run_multi_image(model: str) -> None:
121124
image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
122125
image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
123126
chat_completion_from_url = client.chat.completions.create(
@@ -152,7 +155,7 @@ def run_multi_image() -> None:
152155

153156

154157
# Video input inference
155-
def run_video() -> None:
158+
def run_video(model: str) -> None:
156159
video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
157160
video_base64 = encode_base64_content_from_url(video_url)
158161

@@ -208,7 +211,7 @@ def run_video() -> None:
208211

209212

210213
# Audio input inference
211-
def run_audio() -> None:
214+
def run_audio(model: str) -> None:
212215
from vllm.assets.audio import AudioAsset
213216

214217
audio_url = AudioAsset("winning_call").url
@@ -318,7 +321,8 @@ def parse_args():
318321

319322
def main(args) -> None:
320323
chat_type = args.chat_type
321-
example_function_map[chat_type]()
324+
model = get_first_model(client)
325+
example_function_map[chat_type](model)
322326

323327

324328
if __name__ == "__main__":

examples/online_serving/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
from openai import APIConnectionError, OpenAI
3+
from openai.pagination import SyncPage
4+
from openai.types.model import Model
5+
6+
7+
def get_first_model(client: OpenAI) -> str:
8+
"""
9+
Get the first model from the vLLM server.
10+
"""
11+
try:
12+
models: SyncPage[Model] = client.models.list()
13+
except APIConnectionError as e:
14+
raise RuntimeError(
15+
"Failed to get the list of models from the vLLM server at "
16+
f"{client.base_url} with API key {client.api_key}. Check\n"
17+
"1. the server is running\n"
18+
"2. the server URL is correct\n"
19+
"3. the API key is correct") from e
20+
21+
if len(models.data) == 0:
22+
raise RuntimeError(
23+
f"No models found on the vLLM server at {client.base_url}")
24+
25+
return models.data[0].id

0 commit comments

Comments
 (0)