[Bugfix] validate grammar and throw 400 error instead of crashing the engine when xgrammar validation fails (#17623)

Jason-CKY · russellb · web-flow · commit 08bf78407809 · 2025-05-12T09:06:10.000+08:00
Signed-off-by: Jason Cheng &lt;jasoncky96@gmail.com&gt;
Co-authored-by: Russell Bryant &lt;rbryant@redhat.com&gt;
diff --git a/tests/v1/entrypoints/openai/test_chat_completion.py b/tests/v1/entrypoints/openai/test_chat_completion.py
@@ -0,0 +1,137 @@
+# SPDX-License-Identifier: Apache-2.0
+
+import openai  # use the official client for correctness check
+import pytest
+import pytest_asyncio
+
+from tests.utils import RemoteOpenAIServer
+
+# any model with a chat template defined in tokenizer_config should work here
+MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
+
+
+@pytest.fixture(scope="module")
+def default_server_args():
+    return [
+        # use half precision for speed and memory savings in CI environment
+        "--max-model-len",
+        "2048",
+        "--max-num-seqs",
+        "128",
+        "--enforce-eager",
+    ]
+
+
+@pytest.fixture(scope="module")
+def server(default_server_args):
+    with RemoteOpenAIServer(MODEL_NAME, default_server_args) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_name",
+    [MODEL_NAME],
+)
+async def test_invalid_json_schema(client: openai.AsyncOpenAI,
+                                   model_name: str) -> None:
+    invalid_json_schema = {
+        "$defs": {
+            "CarType": {
+                "enum": ["sedan", "SUV", "Truck", "Coupe"],
+                "title": "CarType",
+                "type": "string",
+            }
+        },
+        "properties": {
+            "brand": {
+                "title": "Brand",
+                "type": "string"
+            },
+            "model": {
+                "title": "Model",
+                "type": "string"
+            },
+            "car_type": {
+                "$ref": "#/$defs/CarType"
+            },
+            "foo": "bar",
+        },
+        "required": ["brand", "model", "car_type"],
+        "title": "CarDescription",
+        "type": "object",
+    }
+    prompt = ("Generate a JSON with the brand, model and car_type of"
+              "the most iconic car from the 90's")
+    with pytest.raises((openai.BadRequestError, openai.APIError)):
+        await client.chat.completions.create(
+            model=model_name,
+            messages=[{
+                "role": "user",
+                "content": prompt,
+            }],
+            extra_body={"guided_json": invalid_json_schema},
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_name",
+    [MODEL_NAME],
+)
+async def test_invalid_regex(client: openai.AsyncOpenAI, model_name: str):
+    prompt = ("Generate an email address for Alan Turing, who works in Enigma."
+              "End in .com and new line. Example result:"
+              "alan.turing@enigma.com\n")
+
+    with pytest.raises((openai.BadRequestError, openai.APIError)):
+        await client.chat.completions.create(
+            model=model_name,
+            messages=[{
+                "role": "user",
+                "content": prompt,
+            }],
+            extra_body={
+                "guided_regex": r"[.*",
+                "stop": ["\n"]
+            },
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_name",
+    [MODEL_NAME],
+)
+async def test_invalid_grammar(client: openai.AsyncOpenAI, model_name: str):
+    invalid_simplified_sql_grammar = """
+        root ::= select_statementinvalidsyntax
+
+        select_statement ::= "SELECT " column " from " table " where " condition
+
+        column ::= "col_1 " | "col_2 "
+
+        table ::= "table_1 " | "table_2 "
+
+        condition ::= column "= " number
+
+        number ::= "1 " | "2 "
+    """
+
+    prompt = ("Generate an SQL query to show the 'username' and 'email'"
+              "from the 'users' table.")
+    with pytest.raises((openai.BadRequestError, openai.APIError)):
+        await client.chat.completions.create(
+            model=model_name,
+            messages=[{
+                "role": "user",
+                "content": prompt,
+            }],
+            extra_body={"guided_grammar": invalid_simplified_sql_grammar},
+        )
diff --git a/tests/v1/entrypoints/openai/test_completion.py b/tests/v1/entrypoints/openai/test_completion.py
@@ -584,3 +584,97 @@ async def test_echo_logprob_completion(client: openai.AsyncOpenAI,
             assert max(logprobs_arg,
                        1) <= len(top_logprobs) <= logprobs_arg + 1
         assert len(logprobs.tokens) > 5
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_name",
+    [MODEL_NAME],
+)
+async def test_invalid_json_schema(client: openai.AsyncOpenAI,
+                                   model_name: str) -> None:
+    invalid_json_schema = {
+        "$defs": {
+            "CarType": {
+                "enum": ["sedan", "SUV", "Truck", "Coupe"],
+                "title": "CarType",
+                "type": "string",
+            }
+        },
+        "properties": {
+            "brand": {
+                "title": "Brand",
+                "type": "string"
+            },
+            "model": {
+                "title": "Model",
+                "type": "string"
+            },
+            "car_type": {
+                "$ref": "#/$defs/CarType"
+            },
+            "foo": "bar",
+        },
+        "required": ["brand", "model", "car_type"],
+        "title": "CarDescription",
+        "type": "object",
+    }
+    prompt = ("Generate a JSON with the brand, model and car_type of"
+              "the most iconic car from the 90's")
+    with pytest.raises((openai.BadRequestError, openai.APIError)):
+        await client.completions.create(
+            model=model_name,
+            prompt=prompt,
+            extra_body={"guided_json": invalid_json_schema},
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_name",
+    [MODEL_NAME],
+)
+async def test_invalid_regex(client: openai.AsyncOpenAI, model_name: str):
+    prompt = ("Generate an email address for Alan Turing, who works in Enigma."
+              "End in .com and new line. Example result:"
+              "alan.turing@enigma.com\n")
+
+    with pytest.raises((openai.BadRequestError, openai.APIError)):
+        await client.completions.create(
+            model=model_name,
+            prompt=prompt,
+            extra_body={
+                "guided_regex": r"[.*",
+                "stop": ["\n"]
+            },
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_name",
+    [MODEL_NAME],
+)
+async def test_invalid_grammar(client: openai.AsyncOpenAI, model_name: str):
+    invalid_simplified_sql_grammar = """
+        root ::= select_statementinvalidsyntax
+
+        select_statement ::= "SELECT " column " from " table " where " condition
+
+        column ::= "col_1 " | "col_2 "
+
+        table ::= "table_1 " | "table_2 "
+
+        condition ::= column "= " number
+
+        number ::= "1 " | "2 "
+    """
+
+    prompt = ("Generate an SQL query to show the 'username' and 'email'"
+              "from the 'users' table.")
+    with pytest.raises((openai.BadRequestError, openai.APIError)):
+        await client.completions.create(
+            model=model_name,
+            prompt=prompt,
+            extra_body={"guided_grammar": invalid_simplified_sql_grammar},
+        )
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
@@ -188,8 +188,10 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
                 validate_xgrammar_grammar(params)
                 params.guided_decoding.backend = "xgrammar"
             except ValueError:
-                # The request includes some jsonschema feature(s) that
+                # The request either failed validation
+                # or includes some jsonschema feature(s) that
                 # are not supported in xgrammar. Fall back to guidance.
+                validate_guidance_grammar(params, tokenizer=None)
                 params.guided_decoding.backend = "guidance"
             # Remember that this backend was set automatically
             params.guided_decoding.backend_was_auto = True
diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py
@@ -282,6 +282,12 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None:
         else:
             schema = gd_params.json
 
+        try:
+            xgr.Grammar.from_json_schema(schema)
+        except Exception as err:
+            raise ValueError("Failed to transform json schema into a grammar: "
+                             f"{err}") from err
+
         if has_xgrammar_unsupported_json_features(schema):
             raise ValueError("The provided JSON schema contains features not "
                              "supported by xgrammar.")