7
7
8
8
from vllm .config import VllmConfig
9
9
from vllm .logger import init_logger
10
+ from vllm .reasoning import ReasoningParserManager
10
11
from vllm .transformers_utils .tokenizer_group import init_tokenizer_from_configs
11
12
from vllm .v1 .structured_output .backend_guidance import GuidanceBackend
12
13
from vllm .v1 .structured_output .backend_types import (StructuredOutputBackend ,
18
19
import numpy .typing as npt
19
20
import torch
20
21
22
+ from vllm .reasoning import ReasoningParser
21
23
from vllm .v1 .request import Request
22
24
23
25
logger = init_logger (__name__ )
@@ -28,6 +30,7 @@ class StructuredOutputManager:
28
30
29
31
def __init__ (self , vllm_config : VllmConfig ):
30
32
self .backend : Optional [StructuredOutputBackend ] = None
33
+ self .reasoner : Optional [ReasoningParser ] = None
31
34
self .vllm_config = vllm_config
32
35
self ._grammar_bitmask : Optional [torch .Tensor ] = None
33
36
@@ -72,6 +75,12 @@ def grammar_init(self, request: Request) -> None:
72
75
raise ValueError (
73
76
f"Unsupported structured output backend: { backend_name } " )
74
77
78
+ if (reasoning_backend :=
79
+ self .vllm_config .decoding_config .reasoning_backend
80
+ ) is not None and self .reasoner is None :
81
+ self .reasoner = ReasoningParserManager .get_reasoning_parser (
82
+ reasoning_backend )(tokenizer = tokenizer )
83
+
75
84
grammar = self .executor .submit (self ._async_create_grammar , request )
76
85
request .structured_output_request .grammar = grammar # type: ignore[assignment]
77
86
0 commit comments