File tree 5 files changed +10
-10
lines changed
5 files changed +10
-10
lines changed Original file line number Diff line number Diff line change 1
1
# SPDX-License-Identifier: Apache-2.0
2
2
3
+ from __future__ import annotations
4
+
3
5
import os
4
6
from abc import abstractmethod
5
7
from collections .abc import Sequence
@@ -33,7 +35,7 @@ def vocab(self) -> dict[str, int]:
33
35
return self .model_tokenizer .get_vocab ()
34
36
35
37
@abstractmethod
36
- def is_reasoning_end (self , input_ids : list [int ]) -> bool :
38
+ def is_reasoning_end (self , input_ids : Sequence [int ]) -> bool :
37
39
"""
38
40
Check if the reasoning content ends in the input_ids.
39
41
Original file line number Diff line number Diff line change @@ -655,21 +655,19 @@ def update_from_output(
655
655
if new_token_ids and request .use_structured_output :
656
656
advance_fsm = False
657
657
reasoner = self .structured_output_manager .reasoner
658
- is_reasoning_end_this_step = False # Flag the transition
658
+ so_request = request .structured_output_request
659
+ is_reasoning_end_this_step = False
659
660
660
- if reasoner is None or request .reasoning_ended :
661
- # Reasoning was already off or never active
661
+ if reasoner is None or so_request .reasoning_ended : # type: ignore[union-attr]
662
662
advance_fsm = True
663
- else :
664
- # Reasoning is active, check if it ends now
663
+ else : # type: ignore[union-attr]
665
664
if reasoner .is_reasoning_end (request .all_token_ids ):
666
- request .reasoning_ended = True
665
+ so_request .reasoning_ended = True # type: ignore[union-attr]
667
666
is_reasoning_end_this_step = True
668
667
# Don't advance FSM in the step the transition occurs,
669
668
# as new_token_ids might contain the end marker.
670
669
advance_fsm = False
671
670
else :
672
- # Reasoning continues, don't advance FSM
673
671
advance_fsm = False
674
672
675
673
# Only advance FSM if reasoning was already off OR
Original file line number Diff line number Diff line change @@ -37,7 +37,6 @@ def __init__(
37
37
self .eos_token_id = eos_token_id
38
38
self .lora_request = lora_request
39
39
self .structured_output_request = structured_output_request
40
- self .reasoning_ended : bool = False
41
40
42
41
self .status = (RequestStatus .WAITING_FOR_FSM
43
42
if sampling_params .guided_decoding is not None else
Original file line number Diff line number Diff line change @@ -129,7 +129,7 @@ def grammar_bitmask(
129
129
assert so_request is not None and so_request .grammar is not None
130
130
131
131
apply_bitmask = (self .reasoner is None
132
- or full_request .reasoning_ended
132
+ or so_request .reasoning_ended
133
133
or self .reasoner .is_reasoning_end (
134
134
full_request .all_token_ids ))
135
135
Original file line number Diff line number Diff line change @@ -20,6 +20,7 @@ class StructuredOutputRequest:
20
20
sampling_params : SamplingParams
21
21
_grammar : Optional [Union [Future [StructuredOutputGrammar ],
22
22
StructuredOutputGrammar ]] = None
23
+ reasoning_ended : bool = False
23
24
24
25
def _check_grammar_completion (self ) -> bool :
25
26
# NOTE: We have to lazy import to gate circular imports
You can’t perform that action at this time.
0 commit comments