Skip to content

Commit b170dd1

Browse files
committed
Unified the resource cleanup for aborted and finished requests
Signed-off-by: 盏一 <zhanyi.ww@alibaba-inc.com>
1 parent e6ad559 commit b170dd1

File tree

2 files changed

+13
-8
lines changed

2 files changed

+13
-8
lines changed

vllm/v1/engine/async_llm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ async def _run_output_handler(self):
331331
# 3) Abort any reqs that finished due to stop strings.
332332
await self.engine_core.abort_requests_async(
333333
processed_outputs.reqs_to_abort)
334-
self.output_processor.handle_abort_reqs(
334+
self.output_processor.free_aborted_reqs(
335335
processed_outputs.reqs_to_abort)
336336

337337
# 4) Logging.
@@ -355,7 +355,7 @@ async def abort(self, request_id: str) -> None:
355355
# At this point, the abort message has already been sent to EngineCore,
356356
# so the request status in the Frontend can be removed.
357357
# For more details, please see: PR #15326
358-
self.output_processor.handle_abort_reqs(request_ids)
358+
self.output_processor.free_aborted_reqs(request_ids)
359359

360360
if self.log_requests:
361361
logger.info("Aborted request %s.", request_id)

vllm/v1/engine/output_processor.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def abort_requests(
240240
request_ids: Iterable[str],
241241
) -> list[str]:
242242
request_ids_to_abort = self.flatten_req_to_abort(request_ids)
243-
self.handle_abort_reqs(request_ids_to_abort)
243+
self.free_aborted_reqs(request_ids_to_abort)
244244
return request_ids_to_abort
245245

246246
def flatten_req_to_abort(self, req_ids: Iterable[str]) -> list[str]:
@@ -253,7 +253,7 @@ def flatten_req_to_abort(self, req_ids: Iterable[str]) -> list[str]:
253253
ret.extend(parent.child_requests)
254254
return ret
255255

256-
def handle_abort_reqs(self, req_ids: Iterable[str]):
256+
def free_aborted_reqs(self, req_ids: Iterable[str]):
257257
"""
258258
Handles aborted requests. This method is triggered when the frontend
259259
detects that a request has ended, such as when the client disconnects
@@ -263,16 +263,21 @@ def handle_abort_reqs(self, req_ids: Iterable[str]):
263263
req_state = self.request_states.pop(req_id, None)
264264
if req_state is not None:
265265
self.lora_states.abort_request(req_state)
266+
# TODO: handle _update_stats_from_finished here
267+
# may need to add some parameters, such as iteration_stats and
268+
# finished_reason.
266269
return
267270

268-
def finish_request(self, request_id: str) -> None:
271+
def free_finised_reqs(self, req_ids: Iterable[str]):
269272
"""
270273
Handle a finished request. This method is called when EngineCore detects
271274
that the request has ended, and the resources related to the request
272275
maintained by EngineCore have been released.
273276
"""
274-
req_state = self.request_states.pop(request_id)
275-
self.lora_states.finish_request(req_state)
277+
for req_id in req_ids:
278+
req_state = self.request_states.pop(req_id)
279+
self.lora_states.finish_request(req_state)
280+
# TODO: handle _update_stats_from_finished here
276281
return
277282

278283
def add_request(
@@ -385,7 +390,7 @@ def process_outputs(
385390
# detected stop string, abort needed in EngineCore.
386391
reqs_to_abort.append(req_id)
387392
else:
388-
self.finish_request(req_id)
393+
self.free_finised_reqs((req_id, ))
389394

390395
# Track per-request stats
391396
self._update_stats_from_finished(req_state, finish_reason,

0 commit comments

Comments
 (0)