|
| 1 | +from typing import Dict, List, Optional |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +from vllm.utils import cuda_device_count_stateless |
| 6 | + |
| 7 | +from ..utils import compare_all_settings |
| 8 | +from .utils import TEST_MODELS_SMOKE |
| 9 | + |
| 10 | + |
| 11 | +@pytest.mark.parametrize("model_info", TEST_MODELS_SMOKE) |
| 12 | +@pytest.mark.parametrize("pp_size", [1, 2]) |
| 13 | +@pytest.mark.parametrize("tp_size", [1]) |
| 14 | +def test_compile_correctness(model_info, pp_size, tp_size): |
| 15 | + # this test is run under multiple suits, with different GPUs. |
| 16 | + # make sure we only run the test with correct CUDA devices. |
| 17 | + # don't use "<", as it will duplicate the tests. |
| 18 | + if cuda_device_count_stateless() != pp_size * tp_size: |
| 19 | + pytest.skip("Not correct CUDA devices for the test.") |
| 20 | + model = model_info[0] |
| 21 | + model_args = model_info[1] |
| 22 | + all_args = [["--enforce-eager"] + model_args + ["--max_model_len", "1024"] |
| 23 | + + ["-pp", str(pp_size)] + ["-tp", str(tp_size)]] * 3 |
| 24 | + all_envs: List[Optional[Dict[str, str]]] = [{ |
| 25 | + "VLLM_TEST_TORCH_COMPILE_LEVEL": |
| 26 | + str(i) |
| 27 | + } for i in range(3)] |
| 28 | + compare_all_settings(model, all_args, all_envs) |
0 commit comments