Skip to content

Commit 20d8ce8

Browse files
reidliu41reidliu41
and
reidliu41
authored
[Frontend] add --quick option for vllm chat/complete (#18297)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
1 parent 84ab4fe commit 20d8ce8

File tree

1 file changed

+29
-0
lines changed

1 file changed

+29
-0
lines changed

vllm/entrypoints/cli/openai.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,18 @@ def cmd(args: argparse.Namespace) -> None:
101101
model_name, client = _interactive_cli(args)
102102
system_prompt = args.system_prompt
103103
conversation: list[ChatCompletionMessageParam] = []
104+
104105
if system_prompt is not None:
105106
conversation.append({"role": "system", "content": system_prompt})
106107

108+
if args.quick:
109+
conversation.append({"role": "user", "content": args.quick})
110+
111+
chat_completion = client.chat.completions.create(
112+
model=model_name, messages=conversation)
113+
print(chat_completion.choices[0].message.content)
114+
return
115+
107116
print("Please enter a message for the chat model:")
108117
while True:
109118
try:
@@ -136,6 +145,12 @@ def subparser_init(
136145
default=None,
137146
help=("The system prompt to be added to the chat template, "
138147
"used for models that support system prompts."))
148+
chat_parser.add_argument("-q",
149+
"--quick",
150+
type=str,
151+
metavar="MESSAGE",
152+
help=("Send a single prompt as MESSAGE "
153+
"and print the response, then exit."))
139154
return chat_parser
140155

141156

@@ -149,6 +164,13 @@ def __init__(self):
149164
@staticmethod
150165
def cmd(args: argparse.Namespace) -> None:
151166
model_name, client = _interactive_cli(args)
167+
168+
if args.quick:
169+
completion = client.completions.create(model=model_name,
170+
prompt=args.quick)
171+
print(completion.choices[0].text)
172+
return
173+
152174
print("Please enter prompt to complete:")
153175
while True:
154176
input_prompt = input("> ")
@@ -168,6 +190,13 @@ def subparser_init(
168190
"via the running API server."),
169191
usage="vllm complete [options]")
170192
_add_query_options(complete_parser)
193+
complete_parser.add_argument(
194+
"-q",
195+
"--quick",
196+
type=str,
197+
metavar="PROMPT",
198+
help=
199+
"Send a single prompt and print the completion output, then exit.")
171200
return complete_parser
172201

173202

0 commit comments

Comments
 (0)