|
5 | 5 | import time
|
6 | 6 | from collections import deque
|
7 | 7 | from collections.abc import Iterable
|
8 |
| -from typing import Optional, Union |
| 8 | +from typing import TYPE_CHECKING, Optional, Union |
9 | 9 |
|
10 | 10 | from vllm.config import (CacheConfig, LoRAConfig, ModelConfig, SchedulerConfig,
|
11 | 11 | SpeculativeConfig)
|
@@ -658,25 +658,27 @@ def update_from_output(
|
658 | 658 | so_request = request.structured_output_request
|
659 | 659 | is_reasoning_end_this_step = False
|
660 | 660 |
|
| 661 | + # NOTE: use_structured_output implies |
| 662 | + # structured_output_request is not None, |
| 663 | + # but type checker isn't smart enough to know this. |
| 664 | + # This only affect type runtime, not actual runtime. |
| 665 | + # assert is also not recommended on perf-sensitive runtime path. |
| 666 | + if TYPE_CHECKING: |
| 667 | + assert so_request is not None |
| 668 | + |
661 | 669 | if reasoner is None or so_request.reasoning_ended: # type: ignore[union-attr]
|
662 | 670 | advance_fsm = True
|
663 |
| - else: # type: ignore[union-attr] |
664 |
| - if reasoner.is_reasoning_end(request.all_token_ids): |
665 |
| - so_request.reasoning_ended = True # type: ignore[union-attr] |
666 |
| - is_reasoning_end_this_step = True |
667 |
| - # Don't advance FSM in the step the transition occurs, |
668 |
| - # as new_token_ids might contain the end marker. |
669 |
| - advance_fsm = False |
670 |
| - else: |
671 |
| - advance_fsm = False |
| 671 | + elif reasoner.is_reasoning_end(request.all_token_ids): |
| 672 | + so_request.reasoning_ended = True # type: ignore[union-attr] |
| 673 | + is_reasoning_end_this_step = True |
672 | 674 |
|
673 | 675 | # Only advance FSM if reasoning was already off OR
|
674 | 676 | # if we are not in the specific step where reasoning just ended.
|
675 | 677 | if advance_fsm and not is_reasoning_end_this_step:
|
676 | 678 | # NOTE: structured_output_request
|
677 | 679 | # should not be None if use_structured_output, we have
|
678 | 680 | # check above, so safe to ignore type warning
|
679 |
| - request.structured_output_request.grammar.accept_tokens( # type: ignore[union-attr] |
| 681 | + so_request.grammar.accept_tokens( # type: ignore[union-attr] |
680 | 682 | req_id, new_token_ids)
|
681 | 683 |
|
682 | 684 | # Get prompt logprobs for this request.
|
|
0 commit comments