From 4600abdfc3bf11dac21103c77eb4704c8b0c21e3 Mon Sep 17 00:00:00 2001
From: Russell Bryant <rbryant@redhat.com>
Date: Thu, 8 May 2025 09:15:04 -0400
Subject: [PATCH 1/3] [CI] Make JSON output tests less likely to fail

We occasionally see the JSON format structured output tests fail in CI.
PR #17490 included a change to the prompts asking to make the response
as short as possible. This change includes a couple more things to help:

- Increase the output length limit. The failures occur when we cut off
  the output before a JSON object is properly terminated.

- Set `additionalProperties` to `False` in each JSON schema used. This
  should restrict the model from adding properties not specified in the
  schemas, unnecessarily increasing the size of the JSON object output
  and making it more likely to hit the length limit before it finishes.

Signed-off-by: Russell Bryant <rbryant@redhat.com>
---
 tests/v1/entrypoints/conftest.py                     | 12 ++++++++----
 .../entrypoints/llm/test_struct_output_generate.py   |  8 ++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/v1/entrypoints/conftest.py b/tests/v1/entrypoints/conftest.py
index d84b2b22db1..bdee0bb8da7 100644
--- a/tests/v1/entrypoints/conftest.py
+++ b/tests/v1/entrypoints/conftest.py
@@ -72,12 +72,14 @@ def sample_json_schema():
                             "type": "string"
                         }
                     },
-                    "required": ["company", "duration", "position"]
+                    "required": ["company", "duration", "position"],
+                    "additionalProperties": False
                 }
             }
         },
         "required":
-        ["name", "age", "skills", "grade", "email", "work_history"]
+        ["name", "age", "skills", "grade", "email", "work_history"],
+        "additionalProperties": False
     }
 
 
@@ -100,7 +102,8 @@ def unsupported_json_schema():
                 }
             }
         },
-        "required": ["score", "tags"]
+        "required": ["score", "tags"],
+        "additionalProperties": False
     }
 
 
@@ -139,7 +142,8 @@ def sample_definition_json_schema():
         },
         'required': ['steps', 'final_answer'],
         'title': 'MathReasoning',
-        'type': 'object'
+        'type': 'object',
+        "additionalProperties": False
     }
 
 
diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
index 81601c87ad8..7a8e1c98495 100644
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -102,7 +102,7 @@ def test_structured_output(
     #
     sampling_params = SamplingParams(
         temperature=1.0,
-        max_tokens=1000,
+        max_tokens=4096,
         guided_decoding=GuidedDecodingParams(json=sample_json_schema))
     outputs = llm.generate(prompts=[
         (f"Give an example JSON for an employee profile that fits this "
@@ -131,7 +131,7 @@ def test_structured_output(
     #
     sampling_params = SamplingParams(
         temperature=1.0,
-        max_tokens=100,
+        max_tokens=4096,
         n=2,
         guided_decoding=GuidedDecodingParams(json_object=True))
 
@@ -161,7 +161,7 @@ def test_structured_output(
     #
     sampling_params = SamplingParams(
         temperature=1.0,
-        max_tokens=1000,
+        max_tokens=4096,
         guided_decoding=GuidedDecodingParams(json=unsupported_json_schema))
     if guided_decoding_backend.startswith("xgrammar"):
         with pytest.raises(ValueError,
@@ -426,7 +426,7 @@ def test_structured_output(
 
     sampling_params = SamplingParams(
         temperature=0.0,
-        max_tokens=100,
+        max_tokens=4096,
         guided_decoding=GuidedDecodingParams(
             structural_tag=json.dumps(structural_tag_config)))
 

From d820a9ff44ccde3d58eb057c02cef7905a4d33f6 Mon Sep 17 00:00:00 2001
From: Russell Bryant <rbryant@redhat.com>
Date: Thu, 8 May 2025 12:22:46 -0400
Subject: [PATCH 2/3] Fix a few spots i missed - expand output length, disable
 additional properties

Signed-off-by: Russell Bryant <rbryant@redhat.com>
---
 tests/v1/entrypoints/llm/test_struct_output_generate.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
index 7a8e1c98495..c45e84b06b7 100644
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -376,12 +376,13 @@ def test_structured_output(
                 "minLength": min_length
             }
         },
-        "required": ["description"]
+        "required": ["description"],
+        "additionalProperties": False
     }
 
     sampling_params = SamplingParams(
         temperature=1.0,
-        max_tokens=1000,
+        max_tokens=4096,
         guided_decoding=GuidedDecodingParams(json=json_schema))
 
     outputs = llm.generate(
@@ -417,7 +418,8 @@ def test_structured_output(
                     "city": {
                         "type": "string"
                     }
-                }
+                },
+                "additionalProperties": False
             },
             "end": "</function>"
         }],

From 57337db0637d82c4f32c1de5e7c3c37192da730a Mon Sep 17 00:00:00 2001
From: Russell Bryant <rbryant@redhat.com>
Date: Fri, 9 May 2025 16:29:25 +0000
Subject: [PATCH 3/3] string invalid JSON characters that xgrammar allows

workaround for https://github.com/mlc-ai/xgrammar/issues/286
to avoid CI failures

Signed-off-by: Russell Bryant <rbryant@redhat.com>
---
 .../v1/entrypoints/llm/test_struct_output_generate.py  | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
index c45e84b06b7..5c116598ff3 100644
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -62,6 +62,16 @@ class CarDescription(BaseModel):
     car_type: CarType
 
 
+def _load_json(s: str, backend: str) -> str:
+    if backend != "xgrammar":
+        return json.loads(s)
+
+    # xgrammar specific workarounds
+    # https://github.com/mlc-ai/xgrammar/issues/286
+    s = re.sub(r'[\x00-\x1F\x7F-\xFF]', '', s)
+    return json.loads(s)
+
+
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize(
     "model_name, guided_decoding_backend, tokenizer_mode, speculative_config",