Addressed feedback from pull request

hidva · hidva · commit e6ad55971798 · 2025-04-02T13:38:40.000+08:00
Signed-off-by: 盏一 &lt;zhanyi.ww@alibaba-inc.com&gt;
diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
@@ -354,6 +354,7 @@ async def abort(self, request_id: str) -> None:
         await self.engine_core.abort_requests_async(request_ids)
         # At this point, the abort message has already been sent to EngineCore,
         # so the request status in the Frontend can be removed.
+        # For more details, please see: PR #15326
         self.output_processor.handle_abort_reqs(request_ids)
 
         if self.log_requests:
diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py
@@ -253,21 +253,25 @@ def flatten_req_to_abort(self, req_ids: Iterable[str]) -> list[str]:
                 ret.extend(parent.child_requests)
         return ret
 
-    # "Aborted request", meaning the frontend first detects that
-    # the request has ended, such as when the client disconnects
-    # or the detokenizer detects a stop string.
     def handle_abort_reqs(self, req_ids: Iterable[str]):
+        """
+        Handles aborted requests. This method is triggered when the frontend
+        detects that a request has ended, such as when the client disconnects
+        or the detokenizer detects a stop string.
+        """
         for req_id in req_ids:
             req_state = self.request_states.pop(req_id, None)
             if req_state is not None:
                 self.lora_states.abort_request(req_state)
         return
 
-    # "Finished request", meaning EngineCore first detects that
-    # the request has ended, and the resources related to the request
-    # maintained by EngineCore have been released.
-    def _handle_finished_reqs(self, req_id):
-        req_state = self.request_states.pop(req_id)
+    def finish_request(self, request_id: str) -> None:
+        """
+        Handle a finished request. This method is called when EngineCore detects
+        that the request has ended, and the resources related to the request
+        maintained by EngineCore have been released.
+        """
+        req_state = self.request_states.pop(request_id)
         self.lora_states.finish_request(req_state)
         return
 
@@ -322,6 +326,12 @@ def process_outputs(
         within the loop below.
 
         **********************************************************
+
+        NOTE: Stop string requests are finished externally to this function
+        because we must first send EngineCoreRequestType.ABORT to EngineCore
+        before cleaning up the request states in the Frontend. This prevents
+        the Frontend from adding two requests with duplicate RequestIds to
+        EngineCore simultaneously.
         """
 
         request_outputs: list[RequestOutput] = []
@@ -375,7 +385,7 @@ def process_outputs(
                     # detected stop string, abort needed in EngineCore.
                     reqs_to_abort.append(req_id)
                 else:
-                    self._handle_finished_reqs(req_id)
+                    self.finish_request(req_id)
 
                 # Track per-request stats
                 self._update_stats_from_finished(req_state, finish_reason,