Skip to content

Commit 324099c

Browse files
committed
[BugFix] Fix ascend config check
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 78431b3 commit 324099c

File tree

4 files changed

+80
-6
lines changed

4 files changed

+80
-6
lines changed

docs/source/user_guide/additional_config.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ The following table lists the additional configuration options available in vLLM
2929
| `torchair_graph_config` | dict | `{}` | The config options for torchair graph mode |
3030
| `ascend_scheduler_config` | dict | `{}` | The config options for ascend scheduler |
3131
| `expert_tensor_parallel_size` | str | `1` | Expert tensor parallel size the model to use. |
32+
| `refresh` | bool | `false` | Whether to refresh global ascend config content. This value is usually used by rlhf case. |
3233

3334
The details of each config option are as follows:
3435

@@ -59,12 +60,13 @@ A full example of additional configuration is as follows:
5960
"enabled": true,
6061
"use_cached_graph": true,
6162
"graph_batch_sizes": [1, 2, 4, 8],
62-
"graph_batch_sizes_init": true
63+
"graph_batch_sizes_init": false
6364
},
6465
"ascend_scheduler_config": {
6566
"enabled": true,
6667
"chunked_prefill_enabled": true,
6768
},
68-
"expert_tensor_parallel_size": 1
69+
"expert_tensor_parallel_size": 1,
70+
"refresh": false,
6971
}
7072
```

tests/singlecard/test_ascend_config.py

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
import pytest
1717

1818
from tests.conftest import VllmRunner
19-
from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config
19+
from vllm_ascend.ascend_config import (clear_ascend_config, get_ascend_config,
20+
init_ascend_config)
2021

2122

2223
def _clean_up_ascend_config(func):
@@ -59,7 +60,25 @@ def test_run_with_ascend_config():
5960
},
6061
"expert_tensor_parallel_size": 1
6162
}
63+
64+
# check passed with eager mode
65+
with VllmRunner("facebook/opt-125m",
66+
additional_config=input_additional_config):
67+
ascend_config = get_ascend_config()
68+
69+
assert not ascend_config.torchair_graph_config.enabled
70+
assert ascend_config.torchair_graph_config.use_cached_graph
71+
assert ascend_config.torchair_graph_config.graph_batch_sizes == [
72+
1, 2, 4, 8
73+
]
74+
assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
75+
assert ascend_config.ascend_scheduler_config.enabled
76+
assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
77+
assert ascend_config.expert_tensor_parallel_size == 1
78+
79+
# check passed with aclgraph mode
6280
with VllmRunner("facebook/opt-125m",
81+
enforce_eager=False,
6382
additional_config=input_additional_config):
6483
ascend_config = get_ascend_config()
6584

@@ -117,3 +136,53 @@ def test_ascend_config_load_error():
117136
enforce_eager=False,
118137
additional_config=input_additional_config_fake_2):
119138
pass
139+
140+
# torchair graph should not be enabled with eager mode
141+
with pytest.raises(RuntimeError):
142+
input_additional_config_fake_1 = {
143+
"torchair_graph_config": {
144+
"enabled": True,
145+
},
146+
}
147+
with VllmRunner("facebook/opt-125m",
148+
enforce_eager=True,
149+
additional_config=input_additional_config_fake_1):
150+
pass
151+
152+
153+
@_clean_up_ascend_config
154+
def test_ascend_config_refresh():
155+
from vllm.config import get_current_vllm_config
156+
vllm_config = get_current_vllm_config()
157+
# set additional_config with none
158+
init_ascend_config(vllm_config)
159+
160+
input_additional_config = {
161+
"torchair_graph_config": {
162+
"enabled": False,
163+
"use_cached_graph": True,
164+
"graph_batch_sizes": [1, 2, 4, 8],
165+
"graph_batch_sizes_init": False,
166+
},
167+
"ascend_scheduler_config": {
168+
"enabled": True,
169+
"enable_chunked_prefill": True,
170+
},
171+
"expert_tensor_parallel_size": 1,
172+
"refresh": True,
173+
}
174+
175+
# refresh ascend config
176+
with VllmRunner("facebook/opt-125m",
177+
additional_config=input_additional_config):
178+
ascend_config = get_ascend_config()
179+
180+
assert not ascend_config.torchair_graph_config.enabled
181+
assert ascend_config.torchair_graph_config.use_cached_graph
182+
assert ascend_config.torchair_graph_config.graph_batch_sizes == [
183+
1, 2, 4, 8
184+
]
185+
assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
186+
assert ascend_config.ascend_scheduler_config.enabled
187+
assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
188+
assert ascend_config.expert_tensor_parallel_size == 1

vllm_ascend/ascend_config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,11 @@ def __init__(self, ascend_scheduler_config: dict):
8282

8383

8484
def init_ascend_config(vllm_config):
85+
additional_config = vllm_config.additional_config if vllm_config.additional_config is not None else {}
86+
refresh = additional_config.get("refresh",
87+
False) if additional_config else False
8588
global _ASCEND_CONFIG
86-
if _ASCEND_CONFIG is not None:
89+
if _ASCEND_CONFIG is not None and not refresh:
8790
return _ASCEND_CONFIG
8891
_ASCEND_CONFIG = AscendConfig(vllm_config)
8992
return _ASCEND_CONFIG
@@ -126,7 +129,7 @@ def check_ascend_config(vllm_config, enforce_eager):
126129
"Torchair graph mode only works with deepseek model.")
127130

128131
# for V1 Engine, aclgraph doesn't work with deepseek model and only qwen model is well tested.
129-
if envs.VLLM_USE_V1 and vllm_config.model_config is not None and not enforce_eager:
132+
if envs.VLLM_USE_V1 and vllm_config.model_config is not None and not enforce_eager and not ascend_config.torchair_graph_config.enabled:
130133
model_type = vllm_config.model_config.hf_config.model_type
131134
if "deepseek" in model_type:
132135
raise NotImplementedError(

vllm_ascend/worker/model_runner_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
323323

324324
ascend_config = get_ascend_config()
325325
self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled and self.vllm_config.model_config.use_mla
326-
self.torchair_graph_use_cached_npu_graph = ascend_config.torchair_graph_config.use_cached_graph
326+
self.use_cached_npu_graph = ascend_config.torchair_graph_config.use_cached_graph
327327
self.torchair_graph_batch_sizes = ascend_config.torchair_graph_config.graph_batch_sizes
328328

329329
if ascend_config.torchair_graph_config.graph_batch_sizes_init:

0 commit comments

Comments
 (0)