@@ -19,6 +19,7 @@ vLLM currently supports the following reasoning models:
19
19
20
20
:::{note}
21
21
IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass ` thinking=True ` in your ` chat_template_kwargs ` .
22
+ The reasoning feature for the Qwen3 series is enabled by default. To disable it, you must pass ` enable_thinking=False ` in your ` chat_template_kwargs ` .
22
23
:::
23
24
24
25
## Quickstart
@@ -49,6 +50,8 @@ model = models.data[0].id
49
50
# Round 1
50
51
messages = [{" role" : " user" , " content" : " 9.11 and 9.8, which is greater?" }]
51
52
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
53
+ # For Qwen3 series, if you want to disable reasoning in reasoning mode, add:
54
+ # extra_body={"chat_template_kwargs": {"enable_thinking": False}}
52
55
response = client.chat.completions.create(model = model, messages = messages)
53
56
54
57
reasoning_content = response.choices[0 ].message.reasoning_content
@@ -104,6 +107,8 @@ model = models.data[0].id
104
107
105
108
messages = [{" role" : " user" , " content" : " 9.11 and 9.8, which is greater?" }]
106
109
# For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
110
+ # For Qwen3 series, if you want to disable reasoning in reasoning mode, add:
111
+ # extra_body={"chat_template_kwargs": {"enable_thinking": False}}
107
112
stream = client.chat.completions.create(model = model,
108
113
messages = messages,
109
114
stream = True )
0 commit comments