Skip to content

Commit edcc183

Browse files
committed
format cleanup
1 parent fa90d4f commit edcc183

File tree

2 files changed

+23
-20
lines changed

2 files changed

+23
-20
lines changed

vllm/entrypoints/openai/serving_chat.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
ChatCompletionRequest, ChatCompletionResponse,
2323
ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
2424
ChatCompletionStreamResponse, ChatMessage, DeltaFunctionCall, DeltaMessage,
25-
DeltaToolCall, ErrorResponse, FunctionCall, ToolCall, UsageInfo,
26-
RequestResponseMetadata)
25+
DeltaToolCall, ErrorResponse, FunctionCall, RequestResponseMetadata,
26+
ToolCall, UsageInfo)
2727
from vllm.entrypoints.openai.serving_engine import (BaseModelPath,
2828
LoRAModulePath,
2929
OpenAIServing,
@@ -589,12 +589,12 @@ async def chat_completion_stream_generator(
589589
exclude_unset=True, exclude_none=True))
590590
yield f"data: {final_usage_data}\n\n"
591591

592-
# report to FastAPI middleware aggregate number of completion tokens (across all choices)
592+
# report to FastAPI middleware aggregate usage across all choices
593593
num_completion_tokens = sum(previous_num_tokens)
594594
request_metadata.final_usage_info = UsageInfo(
595-
prompt_tokens=num_prompt_tokens,
596-
completion_tokens=num_completion_tokens,
597-
total_tokens=num_prompt_tokens+num_completion_tokens)
595+
prompt_tokens=num_prompt_tokens,
596+
completion_tokens=num_completion_tokens,
597+
total_tokens=num_prompt_tokens + num_completion_tokens)
598598

599599
except ValueError as e:
600600
# TODO: Use a vllm-specific Validation Error

vllm/entrypoints/openai/serving_completion.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
CompletionResponseChoice,
1919
CompletionResponseStreamChoice,
2020
CompletionStreamResponse,
21-
ErrorResponse, UsageInfo,
22-
RequestResponseMetadata)
21+
ErrorResponse,
22+
RequestResponseMetadata,
23+
UsageInfo)
2324
# yapf: enable
2425
from vllm.entrypoints.openai.serving_engine import (BaseModelPath,
2526
LoRAModulePath,
@@ -170,14 +171,15 @@ async def create_completion(
170171

171172
# Streaming response
172173
if stream:
173-
return self.completion_stream_generator(request,
174-
result_generator,
175-
request_id,
176-
created_time,
177-
model_name,
178-
num_prompts=len(prompts),
179-
tokenizer=tokenizer,
180-
request_metadata=request_metadata)
174+
return self.completion_stream_generator(
175+
request,
176+
result_generator,
177+
request_id,
178+
created_time,
179+
model_name,
180+
num_prompts=len(prompts),
181+
tokenizer=tokenizer,
182+
request_metadata=request_metadata)
181183

182184
# Non-streaming response
183185
final_res_batch: List[Optional[RequestOutput]] = [None] * len(prompts)
@@ -354,12 +356,13 @@ async def completion_stream_generator(
354356
exclude_unset=False, exclude_none=True))
355357
yield f"data: {final_usage_data}\n\n"
356358

357-
# report to FastAPI middleware aggregate tokens (all prompts, all completions)
359+
# report to FastAPI middleware aggregate usage across all choices
358360
total_prompt_tokens = sum(num_prompt_tokens)
359361
total_completion_tokens = sum(previous_num_tokens)
360-
request_metadata.final_usage_info = UsageInfo(prompt_tokens=total_prompt_tokens,
361-
completion_tokens=total_completion_tokens,
362-
total_tokens=total_prompt_tokens + total_completion_tokens)
362+
request_metadata.final_usage_info = UsageInfo(
363+
prompt_tokens=total_prompt_tokens,
364+
completion_tokens=total_completion_tokens,
365+
total_tokens=total_prompt_tokens + total_completion_tokens)
363366

364367
except ValueError as e:
365368
# TODO: Use a vllm-specific Validation Error

0 commit comments

Comments
 (0)