@@ -253,21 +253,25 @@ def flatten_req_to_abort(self, req_ids: Iterable[str]) -> list[str]:
253
253
ret .extend (parent .child_requests )
254
254
return ret
255
255
256
- # "Aborted request", meaning the frontend first detects that
257
- # the request has ended, such as when the client disconnects
258
- # or the detokenizer detects a stop string.
259
256
def handle_abort_reqs (self , req_ids : Iterable [str ]):
257
+ """
258
+ Handles aborted requests. This method is triggered when the frontend
259
+ detects that a request has ended, such as when the client disconnects
260
+ or the detokenizer detects a stop string.
261
+ """
260
262
for req_id in req_ids :
261
263
req_state = self .request_states .pop (req_id , None )
262
264
if req_state is not None :
263
265
self .lora_states .abort_request (req_state )
264
266
return
265
267
266
- # "Finished request", meaning EngineCore first detects that
267
- # the request has ended, and the resources related to the request
268
- # maintained by EngineCore have been released.
269
- def _handle_finished_reqs (self , req_id ):
270
- req_state = self .request_states .pop (req_id )
268
+ def finish_request (self , request_id : str ) -> None :
269
+ """
270
+ Handle a finished request. This method is called when EngineCore detects
271
+ that the request has ended, and the resources related to the request
272
+ maintained by EngineCore have been released.
273
+ """
274
+ req_state = self .request_states .pop (request_id )
271
275
self .lora_states .finish_request (req_state )
272
276
return
273
277
@@ -322,6 +326,12 @@ def process_outputs(
322
326
within the loop below.
323
327
324
328
**********************************************************
329
+
330
+ NOTE: Stop string requests are finished externally to this function
331
+ because we must first send EngineCoreRequestType.ABORT to EngineCore
332
+ before cleaning up the request states in the Frontend. This prevents
333
+ the Frontend from adding two requests with duplicate RequestIds to
334
+ EngineCore simultaneously.
325
335
"""
326
336
327
337
request_outputs : list [RequestOutput ] = []
@@ -375,7 +385,7 @@ def process_outputs(
375
385
# detected stop string, abort needed in EngineCore.
376
386
reqs_to_abort .append (req_id )
377
387
else :
378
- self ._handle_finished_reqs (req_id )
388
+ self .finish_request (req_id )
379
389
380
390
# Track per-request stats
381
391
self ._update_stats_from_finished (req_state , finish_reason ,
0 commit comments