|
16 | 16 | import pytest
|
17 | 17 |
|
18 | 18 | from tests.conftest import VllmRunner
|
19 |
| -from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config |
| 19 | +from vllm_ascend.ascend_config import (clear_ascend_config, get_ascend_config, |
| 20 | + init_ascend_config) |
20 | 21 |
|
21 | 22 |
|
22 | 23 | def _clean_up_ascend_config(func):
|
@@ -59,7 +60,25 @@ def test_run_with_ascend_config():
|
59 | 60 | },
|
60 | 61 | "expert_tensor_parallel_size": 1
|
61 | 62 | }
|
| 63 | + |
| 64 | + # check passed with eager mode |
| 65 | + with VllmRunner("facebook/opt-125m", |
| 66 | + additional_config=input_additional_config): |
| 67 | + ascend_config = get_ascend_config() |
| 68 | + |
| 69 | + assert not ascend_config.torchair_graph_config.enabled |
| 70 | + assert ascend_config.torchair_graph_config.use_cached_graph |
| 71 | + assert ascend_config.torchair_graph_config.graph_batch_sizes == [ |
| 72 | + 1, 2, 4, 8 |
| 73 | + ] |
| 74 | + assert not ascend_config.torchair_graph_config.graph_batch_sizes_init |
| 75 | + assert ascend_config.ascend_scheduler_config.enabled |
| 76 | + assert ascend_config.ascend_scheduler_config.enable_chunked_prefill |
| 77 | + assert ascend_config.expert_tensor_parallel_size == 1 |
| 78 | + |
| 79 | + # check passed with aclgraph mode |
62 | 80 | with VllmRunner("facebook/opt-125m",
|
| 81 | + enforce_eager=False, |
63 | 82 | additional_config=input_additional_config):
|
64 | 83 | ascend_config = get_ascend_config()
|
65 | 84 |
|
@@ -117,3 +136,53 @@ def test_ascend_config_load_error():
|
117 | 136 | enforce_eager=False,
|
118 | 137 | additional_config=input_additional_config_fake_2):
|
119 | 138 | pass
|
| 139 | + |
| 140 | + # torchair graph should not be enabled with eager mode |
| 141 | + with pytest.raises(RuntimeError): |
| 142 | + input_additional_config_fake_1 = { |
| 143 | + "torchair_graph_config": { |
| 144 | + "enabled": True, |
| 145 | + }, |
| 146 | + } |
| 147 | + with VllmRunner("facebook/opt-125m", |
| 148 | + enforce_eager=True, |
| 149 | + additional_config=input_additional_config_fake_1): |
| 150 | + pass |
| 151 | + |
| 152 | + |
| 153 | +@_clean_up_ascend_config |
| 154 | +def test_ascend_config_refresh(): |
| 155 | + from vllm.config import get_current_vllm_config |
| 156 | + vllm_config = get_current_vllm_config() |
| 157 | + # set additional_config with none |
| 158 | + init_ascend_config(vllm_config) |
| 159 | + |
| 160 | + input_additional_config = { |
| 161 | + "torchair_graph_config": { |
| 162 | + "enabled": False, |
| 163 | + "use_cached_graph": True, |
| 164 | + "graph_batch_sizes": [1, 2, 4, 8], |
| 165 | + "graph_batch_sizes_init": False, |
| 166 | + }, |
| 167 | + "ascend_scheduler_config": { |
| 168 | + "enabled": True, |
| 169 | + "enable_chunked_prefill": True, |
| 170 | + }, |
| 171 | + "expert_tensor_parallel_size": 1, |
| 172 | + "refresh": True, |
| 173 | + } |
| 174 | + |
| 175 | + # refresh ascend config |
| 176 | + with VllmRunner("facebook/opt-125m", |
| 177 | + additional_config=input_additional_config): |
| 178 | + ascend_config = get_ascend_config() |
| 179 | + |
| 180 | + assert not ascend_config.torchair_graph_config.enabled |
| 181 | + assert ascend_config.torchair_graph_config.use_cached_graph |
| 182 | + assert ascend_config.torchair_graph_config.graph_batch_sizes == [ |
| 183 | + 1, 2, 4, 8 |
| 184 | + ] |
| 185 | + assert not ascend_config.torchair_graph_config.graph_batch_sizes_init |
| 186 | + assert ascend_config.ascend_scheduler_config.enabled |
| 187 | + assert ascend_config.ascend_scheduler_config.enable_chunked_prefill |
| 188 | + assert ascend_config.expert_tensor_parallel_size == 1 |
0 commit comments