15
15
from vllm .outputs import RequestOutput
16
16
from vllm .sampling_params import GuidedDecodingParams , SamplingParams
17
17
18
- PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
19
- ("mistralai/Ministral-8B-Instruct-2410" , "xgrammar:disable-any-whitespace" ,
20
- "auto" ),
21
- ("mistralai/Ministral-8B-Instruct-2410" , "guidance:disable-any-whitespace" ,
22
- "auto" ),
23
- ("mistralai/Ministral-8B-Instruct-2410" , "xgrammar:disable-any-whitespace" ,
24
- "mistral" ),
25
- ("Qwen/Qwen2.5-1.5B-Instruct" , "xgrammar:disable-any-whitespace" , "auto" ),
18
+ PARAMS_MODELS_BACKENDS_TOKENIZER_MODE_REASONING_PARSER = [
19
+ (
20
+ "mistralai/Ministral-8B-Instruct-2410" ,
21
+ "xgrammar:disable-any-whitespace" ,
22
+ "auto" ,
23
+ None ,
24
+ ),
25
+ (
26
+ "mistralai/Ministral-8B-Instruct-2410" ,
27
+ "guidance:disable-any-whitespace" ,
28
+ "auto" ,
29
+ None ,
30
+ ),
31
+ (
32
+ "mistralai/Ministral-8B-Instruct-2410" ,
33
+ "xgrammar:disable-any-whitespace" ,
34
+ "mistral" ,
35
+ None ,
36
+ ),
37
+ (
38
+ "Qwen/Qwen2.5-1.5B-Instruct" ,
39
+ "xgrammar:disable-any-whitespace" ,
40
+ "auto" ,
41
+ None ,
42
+ ),
43
+ (
44
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" ,
45
+ "xgrammar:disable-any-whitespace" ,
46
+ "auto" ,
47
+ "deepseek_r1" ,
48
+ ),
26
49
#FIXME: This test is flaky on CI thus disabled
27
50
#("Qwen/Qwen2.5-1.5B-Instruct", "guidance:disable-any-whitespace", "auto"),
28
51
]
@@ -47,8 +70,9 @@ class CarDescription(BaseModel):
47
70
48
71
49
72
@pytest .mark .skip_global_cleanup
50
- @pytest .mark .parametrize ("model_name, guided_decoding_backend, tokenizer_mode" ,
51
- PARAMS_MODELS_BACKENDS_TOKENIZER_MODE )
73
+ @pytest .mark .parametrize (
74
+ "model_name, guided_decoding_backend, tokenizer_mode, reasoning_parser" ,
75
+ PARAMS_MODELS_BACKENDS_TOKENIZER_MODE_REASONING_PARSER )
52
76
def test_structured_output (
53
77
monkeypatch : pytest .MonkeyPatch ,
54
78
sample_json_schema : dict [str , Any ],
@@ -59,6 +83,7 @@ def test_structured_output(
59
83
sample_guided_choice : str ,
60
84
guided_decoding_backend : str ,
61
85
tokenizer_mode : str ,
86
+ reasoning_parser : str | None ,
62
87
model_name : str ,
63
88
):
64
89
monkeypatch .setenv ("VLLM_USE_V1" , "1" )
@@ -69,7 +94,9 @@ def test_structured_output(
69
94
enforce_eager = True ,
70
95
max_model_len = 1024 ,
71
96
guided_decoding_backend = guided_decoding_backend ,
72
- tokenizer_mode = tokenizer_mode )
97
+ tokenizer_mode = tokenizer_mode ,
98
+ enable_reasoning = reasoning_parser is not None ,
99
+ reasoning_parser = reasoning_parser )
73
100
74
101
#
75
102
# Test 1: Generate JSON output based on a provided schema
@@ -364,6 +391,40 @@ def test_structured_output(
364
391
output_json = json .loads (generated_text )
365
392
jsonschema .validate (instance = output_json , schema = json_schema )
366
393
394
+ #
395
+ # Test 11: Generate structured output with reasoning step
396
+ #
397
+ if reasoning_parser is not None :
398
+ reasoning_prompt = "Solve the following math problem step-by-step, then provide the final answer as JSON object with a single key 'result'. Problem: What is 5 * 8 + 2?" # noqa: E501
399
+ reasoning_schema = {
400
+ "type" : "object" ,
401
+ "properties" : {
402
+ "result" : {
403
+ "type" : "integer"
404
+ }
405
+ },
406
+ "required" : ["result" ]
407
+ }
408
+
409
+ sampling_params = SamplingParams (
410
+ temperature = 0.1 , # Low temp for deterministic reasoning
411
+ max_tokens = 200 ,
412
+ guided_decoding = GuidedDecodingParams (json = reasoning_schema ))
413
+ outputs = llm .generate (prompts = [reasoning_prompt ],
414
+ sampling_params = sampling_params ,
415
+ use_tqdm = True )
416
+
417
+ assert outputs is not None
418
+ output = outputs [0 ]
419
+ assert output is not None
420
+ assert isinstance (output , RequestOutput )
421
+ prompt = output .prompt
422
+ generated_text = output .outputs [0 ].text
423
+ print (f"Prompt: { prompt !r} , Generated text: { generated_text !r} " )
424
+
425
+ output_json = json .loads (generated_text )
426
+ jsonschema .validate (instance = output_json , schema = reasoning_schema )
427
+
367
428
368
429
@pytest .mark .skip_global_cleanup
369
430
@pytest .mark .parametrize ("model_name, tokenizer_mode" ,
0 commit comments