@@ -240,7 +240,7 @@ def abort_requests(
240
240
request_ids : Iterable [str ],
241
241
) -> list [str ]:
242
242
request_ids_to_abort = self .flatten_req_to_abort (request_ids )
243
- self .handle_abort_reqs (request_ids_to_abort )
243
+ self .free_aborted_reqs (request_ids_to_abort )
244
244
return request_ids_to_abort
245
245
246
246
def flatten_req_to_abort (self , req_ids : Iterable [str ]) -> list [str ]:
@@ -253,7 +253,7 @@ def flatten_req_to_abort(self, req_ids: Iterable[str]) -> list[str]:
253
253
ret .extend (parent .child_requests )
254
254
return ret
255
255
256
- def handle_abort_reqs (self , req_ids : Iterable [str ]):
256
+ def free_aborted_reqs (self , req_ids : Iterable [str ]):
257
257
"""
258
258
Handles aborted requests. This method is triggered when the frontend
259
259
detects that a request has ended, such as when the client disconnects
@@ -263,16 +263,21 @@ def handle_abort_reqs(self, req_ids: Iterable[str]):
263
263
req_state = self .request_states .pop (req_id , None )
264
264
if req_state is not None :
265
265
self .lora_states .abort_request (req_state )
266
+ # TODO: handle _update_stats_from_finished here
267
+ # may need to add some parameters, such as iteration_stats and
268
+ # finished_reason.
266
269
return
267
270
268
- def finish_request (self , request_id : str ) -> None :
271
+ def free_finised_reqs (self , req_ids : Iterable [ str ]) :
269
272
"""
270
273
Handle a finished request. This method is called when EngineCore detects
271
274
that the request has ended, and the resources related to the request
272
275
maintained by EngineCore have been released.
273
276
"""
274
- req_state = self .request_states .pop (request_id )
275
- self .lora_states .finish_request (req_state )
277
+ for req_id in req_ids :
278
+ req_state = self .request_states .pop (req_id )
279
+ self .lora_states .finish_request (req_state )
280
+ # TODO: handle _update_stats_from_finished here
276
281
return
277
282
278
283
def add_request (
@@ -385,7 +390,7 @@ def process_outputs(
385
390
# detected stop string, abort needed in EngineCore.
386
391
reqs_to_abort .append (req_id )
387
392
else :
388
- self .finish_request ( req_id )
393
+ self .free_finised_reqs (( req_id , ) )
389
394
390
395
# Track per-request stats
391
396
self ._update_stats_from_finished (req_state , finish_reason ,
0 commit comments