16
16
import pytest
17
17
18
18
from tests .conftest import VllmRunner
19
- from vllm_ascend .ascend_config import clear_ascend_config , get_ascend_config
19
+ from vllm_ascend .ascend_config import (clear_ascend_config , get_ascend_config ,
20
+ init_ascend_config )
20
21
21
22
22
23
def _clean_up_ascend_config (func ):
@@ -59,6 +60,8 @@ def test_run_with_ascend_config():
59
60
},
60
61
"expert_tensor_parallel_size" : 1
61
62
}
63
+
64
+ # check passed with eager mode
62
65
with VllmRunner ("facebook/opt-125m" ,
63
66
additional_config = input_additional_config ):
64
67
ascend_config = get_ascend_config ()
@@ -73,6 +76,22 @@ def test_run_with_ascend_config():
73
76
assert ascend_config .ascend_scheduler_config .enable_chunked_prefill
74
77
assert ascend_config .expert_tensor_parallel_size == 1
75
78
79
+ # check passed with aclgraph mode
80
+ with VllmRunner ("facebook/opt-125m" ,
81
+ enforce_eager = False ,
82
+ additional_config = input_additional_config ):
83
+ ascend_config = get_ascend_config ()
84
+
85
+ assert not ascend_config .torchair_graph_config .enabled
86
+ assert ascend_config .torchair_graph_config .use_cached_graph
87
+ assert ascend_config .torchair_graph_config .graph_batch_sizes == [
88
+ 1 , 2 , 4 , 8
89
+ ]
90
+ assert not ascend_config .torchair_graph_config .graph_batch_sizes_init
91
+ assert ascend_config .ascend_scheduler_config .enabled
92
+ assert ascend_config .ascend_scheduler_config .enable_chunked_prefill
93
+ assert ascend_config .expert_tensor_parallel_size == 1
94
+
76
95
77
96
@_clean_up_ascend_config
78
97
def test_ascend_config_init_error ():
@@ -117,3 +136,53 @@ def test_ascend_config_load_error():
117
136
enforce_eager = False ,
118
137
additional_config = input_additional_config_fake_2 ):
119
138
pass
139
+
140
+ # torchair graph should not be enabled with eager mode
141
+ with pytest .raises (RuntimeError ):
142
+ input_additional_config_fake_3 = {
143
+ "torchair_graph_config" : {
144
+ "enabled" : True ,
145
+ },
146
+ }
147
+ with VllmRunner ("facebook/opt-125m" ,
148
+ enforce_eager = True ,
149
+ additional_config = input_additional_config_fake_3 ):
150
+ pass
151
+
152
+
153
+ @_clean_up_ascend_config
154
+ def test_ascend_config_refresh ():
155
+ from vllm .config import get_current_vllm_config
156
+ vllm_config = get_current_vllm_config ()
157
+ # set additional_config with none
158
+ init_ascend_config (vllm_config )
159
+
160
+ input_additional_config = {
161
+ "torchair_graph_config" : {
162
+ "enabled" : False ,
163
+ "use_cached_graph" : True ,
164
+ "graph_batch_sizes" : [1 , 2 , 4 , 8 ],
165
+ "graph_batch_sizes_init" : False ,
166
+ },
167
+ "ascend_scheduler_config" : {
168
+ "enabled" : True ,
169
+ "enable_chunked_prefill" : True ,
170
+ },
171
+ "expert_tensor_parallel_size" : 1 ,
172
+ "refresh" : True ,
173
+ }
174
+
175
+ # refresh ascend config
176
+ with VllmRunner ("facebook/opt-125m" ,
177
+ additional_config = input_additional_config ):
178
+ ascend_config = get_ascend_config ()
179
+
180
+ assert not ascend_config .torchair_graph_config .enabled
181
+ assert ascend_config .torchair_graph_config .use_cached_graph
182
+ assert ascend_config .torchair_graph_config .graph_batch_sizes == [
183
+ 1 , 2 , 4 , 8
184
+ ]
185
+ assert not ascend_config .torchair_graph_config .graph_batch_sizes_init
186
+ assert ascend_config .ascend_scheduler_config .enabled
187
+ assert ascend_config .ascend_scheduler_config .enable_chunked_prefill
188
+ assert ascend_config .expert_tensor_parallel_size == 1
0 commit comments