1
1
# SPDX-License-Identifier: Apache-2.0
2
- """An example showing how to use vLLM to serve multimodal models
2
+ """An example showing how to use vLLM to serve multimodal models
3
3
and run online serving with OpenAI client.
4
4
5
5
Launch the vLLM server with the following command:
12
12
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
13
13
14
14
(audio inference with Ultravox)
15
- vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096
15
+ vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b \
16
+ --max-model-len 4096 --trust-remote-code
17
+
18
+ run the script with
19
+ python openai_chat_completion_client_for_multimodal.py --chat-type audio
16
20
"""
21
+
17
22
import base64
18
23
19
24
import requests
20
25
from openai import OpenAI
26
+ from utils import get_first_model
21
27
22
28
from vllm .utils import FlexibleArgumentParser
23
29
31
37
base_url = openai_api_base ,
32
38
)
33
39
34
- models = client .models .list ()
35
- model = models .data [0 ].id
36
-
37
40
38
41
def encode_base64_content_from_url (content_url : str ) -> str :
39
42
"""Encode a content retrieved from a remote url to base64 format."""
@@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str:
46
49
47
50
48
51
# Text-only inference
49
- def run_text_only () -> None :
52
+ def run_text_only (model : str ) -> None :
50
53
chat_completion = client .chat .completions .create (
51
54
messages = [{
52
55
"role" : "user" ,
@@ -61,7 +64,7 @@ def run_text_only() -> None:
61
64
62
65
63
66
# Single-image input inference
64
- def run_single_image () -> None :
67
+ def run_single_image (model : str ) -> None :
65
68
66
69
## Use image url in the payload
67
70
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
@@ -117,7 +120,7 @@ def run_single_image() -> None:
117
120
118
121
119
122
# Multi-image input inference
120
- def run_multi_image () -> None :
123
+ def run_multi_image (model : str ) -> None :
121
124
image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
122
125
image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
123
126
chat_completion_from_url = client .chat .completions .create (
@@ -152,7 +155,7 @@ def run_multi_image() -> None:
152
155
153
156
154
157
# Video input inference
155
- def run_video () -> None :
158
+ def run_video (model : str ) -> None :
156
159
video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
157
160
video_base64 = encode_base64_content_from_url (video_url )
158
161
@@ -208,7 +211,7 @@ def run_video() -> None:
208
211
209
212
210
213
# Audio input inference
211
- def run_audio () -> None :
214
+ def run_audio (model : str ) -> None :
212
215
from vllm .assets .audio import AudioAsset
213
216
214
217
audio_url = AudioAsset ("winning_call" ).url
@@ -318,7 +321,8 @@ def parse_args():
318
321
319
322
def main (args ) -> None :
320
323
chat_type = args .chat_type
321
- example_function_map [chat_type ]()
324
+ model = get_first_model (client )
325
+ example_function_map [chat_type ](model )
322
326
323
327
324
328
if __name__ == "__main__" :
0 commit comments