Skip to content

Commit e6ad559

Browse files
committed
Addressed feedback from pull request
Signed-off-by: 盏一 <zhanyi.ww@alibaba-inc.com>
1 parent e60d4aa commit e6ad559

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

vllm/v1/engine/async_llm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,7 @@ async def abort(self, request_id: str) -> None:
354354
await self.engine_core.abort_requests_async(request_ids)
355355
# At this point, the abort message has already been sent to EngineCore,
356356
# so the request status in the Frontend can be removed.
357+
# For more details, please see: PR #15326
357358
self.output_processor.handle_abort_reqs(request_ids)
358359

359360
if self.log_requests:

vllm/v1/engine/output_processor.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -253,21 +253,25 @@ def flatten_req_to_abort(self, req_ids: Iterable[str]) -> list[str]:
253253
ret.extend(parent.child_requests)
254254
return ret
255255

256-
# "Aborted request", meaning the frontend first detects that
257-
# the request has ended, such as when the client disconnects
258-
# or the detokenizer detects a stop string.
259256
def handle_abort_reqs(self, req_ids: Iterable[str]):
257+
"""
258+
Handles aborted requests. This method is triggered when the frontend
259+
detects that a request has ended, such as when the client disconnects
260+
or the detokenizer detects a stop string.
261+
"""
260262
for req_id in req_ids:
261263
req_state = self.request_states.pop(req_id, None)
262264
if req_state is not None:
263265
self.lora_states.abort_request(req_state)
264266
return
265267

266-
# "Finished request", meaning EngineCore first detects that
267-
# the request has ended, and the resources related to the request
268-
# maintained by EngineCore have been released.
269-
def _handle_finished_reqs(self, req_id):
270-
req_state = self.request_states.pop(req_id)
268+
def finish_request(self, request_id: str) -> None:
269+
"""
270+
Handle a finished request. This method is called when EngineCore detects
271+
that the request has ended, and the resources related to the request
272+
maintained by EngineCore have been released.
273+
"""
274+
req_state = self.request_states.pop(request_id)
271275
self.lora_states.finish_request(req_state)
272276
return
273277

@@ -322,6 +326,12 @@ def process_outputs(
322326
within the loop below.
323327
324328
**********************************************************
329+
330+
NOTE: Stop string requests are finished externally to this function
331+
because we must first send EngineCoreRequestType.ABORT to EngineCore
332+
before cleaning up the request states in the Frontend. This prevents
333+
the Frontend from adding two requests with duplicate RequestIds to
334+
EngineCore simultaneously.
325335
"""
326336

327337
request_outputs: list[RequestOutput] = []
@@ -375,7 +385,7 @@ def process_outputs(
375385
# detected stop string, abort needed in EngineCore.
376386
reqs_to_abort.append(req_id)
377387
else:
378-
self._handle_finished_reqs(req_id)
388+
self.finish_request(req_id)
379389

380390
# Track per-request stats
381391
self._update_stats_from_finished(req_state, finish_reason,

0 commit comments

Comments
 (0)