[CI] Make JSON output tests less likely to fail (#17859)

russellb · web-flow · commit ebab1ac37c8e · 2025-05-12T22:31:54.000Z
Signed-off-by: Russell Bryant &lt;rbryant@redhat.com&gt;
diff --git a/tests/v1/entrypoints/conftest.py b/tests/v1/entrypoints/conftest.py
@@ -72,12 +72,14 @@ def sample_json_schema():
                             "type": "string"
                         }
                     },
-                    "required": ["company", "duration", "position"]
+                    "required": ["company", "duration", "position"],
+                    "additionalProperties": False
                 }
             }
         },
         "required":
-        ["name", "age", "skills", "grade", "email", "work_history"]
+        ["name", "age", "skills", "grade", "email", "work_history"],
+        "additionalProperties": False
     }
 
 
@@ -100,7 +102,8 @@ def unsupported_json_schema():
                 }
             }
         },
-        "required": ["score", "tags"]
+        "required": ["score", "tags"],
+        "additionalProperties": False
     }
 
 
@@ -139,7 +142,8 @@ def sample_definition_json_schema():
         },
         'required': ['steps', 'final_answer'],
         'title': 'MathReasoning',
-        'type': 'object'
+        'type': 'object',
+        "additionalProperties": False
     }
 
 
diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -62,6 +62,16 @@ class CarDescription(BaseModel):
     car_type: CarType
 
 
+def _load_json(s: str, backend: str) -> str:
+    if backend != "xgrammar":
+        return json.loads(s)
+
+    # xgrammar specific workarounds
+    # https://github.com/mlc-ai/xgrammar/issues/286
+    s = re.sub(r'[\x00-\x1F\x7F-\xFF]', '', s)
+    return json.loads(s)
+
+
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize(
     "model_name, guided_decoding_backend, tokenizer_mode, speculative_config",
@@ -102,7 +112,7 @@ def test_structured_output(
     #
     sampling_params = SamplingParams(
         temperature=1.0,
-        max_tokens=1000,
+        max_tokens=4096,
         guided_decoding=GuidedDecodingParams(json=sample_json_schema))
     outputs = llm.generate(prompts=[
         (f"Give an example JSON for an employee profile that fits this "
@@ -131,7 +141,7 @@ def test_structured_output(
     #
     sampling_params = SamplingParams(
         temperature=1.0,
-        max_tokens=100,
+        max_tokens=4096,
         n=2,
         guided_decoding=GuidedDecodingParams(json_object=True))
 
@@ -161,7 +171,7 @@ def test_structured_output(
     #
     sampling_params = SamplingParams(
         temperature=1.0,
-        max_tokens=1000,
+        max_tokens=4096,
         guided_decoding=GuidedDecodingParams(json=unsupported_json_schema))
     if guided_decoding_backend.startswith("xgrammar"):
         with pytest.raises(ValueError,
@@ -376,12 +386,13 @@ def test_structured_output(
                 "minLength": min_length
             }
         },
-        "required": ["description"]
+        "required": ["description"],
+        "additionalProperties": False
     }
 
     sampling_params = SamplingParams(
         temperature=1.0,
-        max_tokens=1000,
+        max_tokens=4096,
         guided_decoding=GuidedDecodingParams(json=json_schema))
 
     outputs = llm.generate(
@@ -417,7 +428,8 @@ def test_structured_output(
                     "city": {
                         "type": "string"
                     }
-                }
+                },
+                "additionalProperties": False
             },
             "end": "</function>"
         }],
@@ -426,7 +438,7 @@ def test_structured_output(
 
     sampling_params = SamplingParams(
         temperature=0.0,
-        max_tokens=100,
+        max_tokens=4096,
         guided_decoding=GuidedDecodingParams(
             structural_tag=json.dumps(structural_tag_config)))
 

Original file line number	Diff line number	Diff line change
`@@ -72,12 +72,14 @@ def sample_json_schema():`
`72`	`72`	`"type": "string"`
`73`	`73`	`}`
`74`	`74`	`},`
`75`		`- "required": ["company", "duration", "position"]`
	`75`	`+ "required": ["company", "duration", "position"],`
	`76`	`+ "additionalProperties": False`
`76`	`77`	`}`
`77`	`78`	`}`
`78`	`79`	`},`
`79`	`80`	`"required":`
`80`		`- ["name", "age", "skills", "grade", "email", "work_history"]`
	`81`	`+ ["name", "age", "skills", "grade", "email", "work_history"],`
	`82`	`+ "additionalProperties": False`
`81`	`83`	`}`
`82`	`84`
`83`	`85`
`@@ -100,7 +102,8 @@ def unsupported_json_schema():`
`100`	`102`	`}`
`101`	`103`	`}`
`102`	`104`	`},`
`103`		`- "required": ["score", "tags"]`
	`105`	`+ "required": ["score", "tags"],`
	`106`	`+ "additionalProperties": False`
`104`	`107`	`}`
`105`	`108`
`106`	`109`
`@@ -139,7 +142,8 @@ def sample_definition_json_schema():`
`139`	`142`	`},`
`140`	`143`	`'required': ['steps', 'final_answer'],`
`141`	`144`	`'title': 'MathReasoning',`
`142`		`- 'type': 'object'`
	`145`	`+ 'type': 'object',`
	`146`	`+ "additionalProperties": False`
`143`	`147`	`}`
`144`	`148`
`145`	`149`