Skip to content

Commit 2cf21f0

Browse files
committed
chore: move up checker logics
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
1 parent b704a0b commit 2cf21f0

File tree

2 files changed

+19
-18
lines changed

2 files changed

+19
-18
lines changed

vllm/v1/core/sched/scheduler.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import time
66
from collections import deque
77
from collections.abc import Iterable
8-
from typing import Optional, Union
8+
from typing import TYPE_CHECKING, Optional, Union
99

1010
from vllm.config import (CacheConfig, LoRAConfig, ModelConfig, SchedulerConfig,
1111
SpeculativeConfig)
@@ -658,25 +658,27 @@ def update_from_output(
658658
so_request = request.structured_output_request
659659
is_reasoning_end_this_step = False
660660

661+
# NOTE: use_structured_output implies
662+
# structured_output_request is not None,
663+
# but type checker isn't smart enough to know this.
664+
# This only affect type runtime, not actual runtime.
665+
# assert is also not recommended on perf-sensitive runtime path.
666+
if TYPE_CHECKING:
667+
assert so_request is not None
668+
661669
if reasoner is None or so_request.reasoning_ended: # type: ignore[union-attr]
662670
advance_fsm = True
663-
else: # type: ignore[union-attr]
664-
if reasoner.is_reasoning_end(request.all_token_ids):
665-
so_request.reasoning_ended = True # type: ignore[union-attr]
666-
is_reasoning_end_this_step = True
667-
# Don't advance FSM in the step the transition occurs,
668-
# as new_token_ids might contain the end marker.
669-
advance_fsm = False
670-
else:
671-
advance_fsm = False
671+
elif reasoner.is_reasoning_end(request.all_token_ids):
672+
so_request.reasoning_ended = True # type: ignore[union-attr]
673+
is_reasoning_end_this_step = True
672674

673675
# Only advance FSM if reasoning was already off OR
674676
# if we are not in the specific step where reasoning just ended.
675677
if advance_fsm and not is_reasoning_end_this_step:
676678
# NOTE: structured_output_request
677679
# should not be None if use_structured_output, we have
678680
# check above, so safe to ignore type warning
679-
request.structured_output_request.grammar.accept_tokens( # type: ignore[union-attr]
681+
so_request.grammar.accept_tokens( # type: ignore[union-attr]
680682
req_id, new_token_ids)
681683

682684
# Get prompt logprobs for this request.

vllm/v1/structured_output/__init__.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,18 +122,17 @@ def grammar_bitmask(
122122

123123
for req_id, batch_index in structured_output_request_ids.items():
124124
full_request = requests[req_id]
125-
so_request = full_request.structured_output_request
126-
assert so_request is not None and so_request.grammar is not None
125+
request = full_request.structured_output_request
126+
assert request is not None and request.grammar is not None
127127

128-
apply_bitmask = (self.reasoner is None
129-
or so_request.reasoning_ended
128+
apply_bitmask = (self.reasoner is None or request.reasoning_ended
130129
or self.reasoner.is_reasoning_end(
131130
full_request.all_token_ids))
132131

133-
if apply_bitmask and not so_request.grammar.is_terminated():
134-
so_request.grammar.fill_bitmask(bitmask_tensor, batch_index)
132+
if apply_bitmask and not request.grammar.is_terminated():
133+
request.grammar.fill_bitmask(bitmask_tensor, batch_index)
135134

136-
if batch_len < bitmask_tensor.shape[0]:
135+
if batch_len < self._grammar_bitmask.shape[0]:
137136
bitmask_tensor = self._grammar_bitmask[:batch_len]
138137

139138
# After finishing with the xgrammar operations, we convert to

0 commit comments

Comments
 (0)