vllm-project
diff --git a/‎tests/models/decoder_only/language/test_big_models.py
Lines changed: 3 additions & 2 deletions b/‎tests/models/decoder_only/language/test_big_models.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎tests/models/decoder_only/language/test_minicpm3.py
Lines changed: 0 additions & 52 deletions b/‎tests/models/decoder_only/language/test_minicpm3.py
Lines changed: 0 additions & 52 deletions
@@ -17,6 +17,7 @@
     "EleutherAI/gpt-j-6b",
     # "mosaicml/mpt-7b",  # Broken
     # "Qwen/Qwen1.5-0.5B"  # Broken,
+    "openbmb/MiniCPM3-4B",
 ]
 
 #TODO: remove this after CPU float16 support ready
@@ -39,7 +40,7 @@ def test_models(
     with hf_runner(model, dtype=dtype) as hf_model:
         hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
 
-    with vllm_runner(model, dtype=dtype) as vllm_model:
+    with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model:
         vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
 
     check_outputs_equal(
@@ -57,7 +58,7 @@ def test_model_print(
     model: str,
     dtype: str,
 ) -> None:
-    with vllm_runner(model, dtype=dtype) as vllm_model:
+    with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model:
         # This test is for verifying whether the model's extra_repr
         # can be printed correctly.
         print(vllm_model.model.llm_engine.model_executor.driver_worker.