chore: move up checker logics

aarnphm · aarnphm · commit 2cf21f0a6ce0 · 2025-04-17T19:48:03.000Z
Signed-off-by: Aaron Pham &lt;contact@aarnphm.xyz&gt;
diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py
@@ -5,7 +5,7 @@
 import time
 from collections import deque
 from collections.abc import Iterable
-from typing import Optional, Union
+from typing import TYPE_CHECKING, Optional, Union
 
 from vllm.config import (CacheConfig, LoRAConfig, ModelConfig, SchedulerConfig,
                          SpeculativeConfig)
@@ -658,25 +658,27 @@ def update_from_output(
                 so_request = request.structured_output_request
                 is_reasoning_end_this_step = False
 
+                # NOTE: use_structured_output implies
+                # structured_output_request is not None,
+                # but type checker isn't smart enough to know this.
+                # This only affect type runtime, not actual runtime.
+                # assert is also not recommended on perf-sensitive runtime path.
+                if TYPE_CHECKING:
+                    assert so_request is not None
+
                 if reasoner is None or so_request.reasoning_ended:  # type: ignore[union-attr]
                     advance_fsm = True
-                else:  # type: ignore[union-attr]
-                    if reasoner.is_reasoning_end(request.all_token_ids):
-                        so_request.reasoning_ended = True  # type: ignore[union-attr]
-                        is_reasoning_end_this_step = True
-                        # Don't advance FSM in the step the transition occurs,
-                        # as new_token_ids might contain the end marker.
-                        advance_fsm = False
-                    else:
-                        advance_fsm = False
+                elif reasoner.is_reasoning_end(request.all_token_ids):
+                    so_request.reasoning_ended = True  # type: ignore[union-attr]
+                    is_reasoning_end_this_step = True
 
                 # Only advance FSM if reasoning was already off OR
                 # if we are not in the specific step where reasoning just ended.
                 if advance_fsm and not is_reasoning_end_this_step:
                     # NOTE: structured_output_request
                     # should not be None if use_structured_output, we have
                     # check above, so safe to ignore type warning
-                    request.structured_output_request.grammar.accept_tokens(  # type: ignore[union-attr]
+                    so_request.grammar.accept_tokens(  # type: ignore[union-attr]
                         req_id, new_token_ids)
 
             # Get prompt logprobs for this request.
diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py
@@ -122,18 +122,17 @@ def grammar_bitmask(
 
         for req_id, batch_index in structured_output_request_ids.items():
             full_request = requests[req_id]
-            so_request = full_request.structured_output_request
-            assert so_request is not None and so_request.grammar is not None
+            request = full_request.structured_output_request
+            assert request is not None and request.grammar is not None
 
-            apply_bitmask = (self.reasoner is None
-                             or so_request.reasoning_ended
+            apply_bitmask = (self.reasoner is None or request.reasoning_ended
                              or self.reasoner.is_reasoning_end(
                                  full_request.all_token_ids))
 
-            if apply_bitmask and not so_request.grammar.is_terminated():
-                so_request.grammar.fill_bitmask(bitmask_tensor, batch_index)
+            if apply_bitmask and not request.grammar.is_terminated():
+                request.grammar.fill_bitmask(bitmask_tensor, batch_index)
 
-        if batch_len < bitmask_tensor.shape[0]:
+        if batch_len < self._grammar_bitmask.shape[0]:
             bitmask_tensor = self._grammar_bitmask[:batch_len]
 
         # After finishing with the xgrammar operations, we convert to