Skip to content

Commit 403aef2

Browse files
committed
Consolidate tests; use eager mode to avoid OOM
1 parent 6fd8c63 commit 403aef2

File tree

2 files changed

+3
-54
lines changed

2 files changed

+3
-54
lines changed

tests/models/decoder_only/language/test_big_models.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"EleutherAI/gpt-j-6b",
1818
# "mosaicml/mpt-7b", # Broken
1919
# "Qwen/Qwen1.5-0.5B" # Broken,
20+
"openbmb/MiniCPM3-4B",
2021
]
2122

2223
#TODO: remove this after CPU float16 support ready
@@ -39,7 +40,7 @@ def test_models(
3940
with hf_runner(model, dtype=dtype) as hf_model:
4041
hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
4142

42-
with vllm_runner(model, dtype=dtype) as vllm_model:
43+
with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model:
4344
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
4445

4546
check_outputs_equal(
@@ -57,7 +58,7 @@ def test_model_print(
5758
model: str,
5859
dtype: str,
5960
) -> None:
60-
with vllm_runner(model, dtype=dtype) as vllm_model:
61+
with vllm_runner(model, dtype=dtype, enforce_eager=True) as vllm_model:
6162
# This test is for verifying whether the model's extra_repr
6263
# can be printed correctly.
6364
print(vllm_model.model.llm_engine.model_executor.driver_worker.

tests/models/decoder_only/language/test_minicpm3.py

Lines changed: 0 additions & 52 deletions
This file was deleted.

0 commit comments

Comments
 (0)