[BugFix] Fix ascend config check

wangxiyuan · wangxiyuan · commit d1439b7abcb9 · 2025-06-05T23:51:18.000+08:00
Signed-off-by: wangxiyuan &lt;wangxiyuan1007@gmail.com&gt;
diff --git a/docs/source/user_guide/additional_config.md b/docs/source/user_guide/additional_config.md
@@ -29,6 +29,7 @@ The following table lists the additional configuration options available in vLLM
 | `torchair_graph_config` | dict | `{}` | The config options for torchair graph mode |
 | `ascend_scheduler_config` | dict | `{}` | The config options for ascend scheduler  |
 | `expert_tensor_parallel_size` | str | `1` | Expert tensor parallel size the model to use. |
+| `refresh` | bool | `false` | Whether to refresh global ascend config content. This value is usually used by rlhf case. |
 
 The details of each config option are as follows:
 
@@ -59,12 +60,13 @@ A full example of additional configuration is as follows:
         "enabled": true,
         "use_cached_graph": true,
         "graph_batch_sizes": [1, 2, 4, 8],
-        "graph_batch_sizes_init": true
+        "graph_batch_sizes_init": false
     },
     "ascend_scheduler_config": {
         "enabled": true,
         "chunked_prefill_enabled": true,
     },
-    "expert_tensor_parallel_size": 1
+    "expert_tensor_parallel_size": 1,
+    "refresh": false,
 }
 ```
diff --git a/tests/singlecard/test_ascend_config.py b/tests/singlecard/test_ascend_config.py
@@ -16,7 +16,8 @@
 import pytest
 
 from tests.conftest import VllmRunner
-from vllm_ascend.ascend_config import clear_ascend_config, get_ascend_config
+from vllm_ascend.ascend_config import (clear_ascend_config, get_ascend_config,
+                                       init_ascend_config)
 
 
 def _clean_up_ascend_config(func):
@@ -59,6 +60,8 @@ def test_run_with_ascend_config():
         },
         "expert_tensor_parallel_size": 1
     }
+
+    # check passed with eager mode
     with VllmRunner("facebook/opt-125m",
                     additional_config=input_additional_config):
         ascend_config = get_ascend_config()
@@ -73,6 +76,22 @@ def test_run_with_ascend_config():
         assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
         assert ascend_config.expert_tensor_parallel_size == 1
 
+    # check passed with aclgraph mode
+    with VllmRunner("facebook/opt-125m",
+                    enforce_eager=False,
+                    additional_config=input_additional_config):
+        ascend_config = get_ascend_config()
+
+        assert not ascend_config.torchair_graph_config.enabled
+        assert ascend_config.torchair_graph_config.use_cached_graph
+        assert ascend_config.torchair_graph_config.graph_batch_sizes == [
+            1, 2, 4, 8
+        ]
+        assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
+        assert ascend_config.ascend_scheduler_config.enabled
+        assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
+        assert ascend_config.expert_tensor_parallel_size == 1
+
 
 @_clean_up_ascend_config
 def test_ascend_config_init_error():
@@ -117,3 +136,53 @@ def test_ascend_config_load_error():
                         enforce_eager=False,
                         additional_config=input_additional_config_fake_2):
             pass
+
+    # torchair graph should not be enabled with eager mode
+    with pytest.raises(RuntimeError):
+        input_additional_config_fake_3 = {
+            "torchair_graph_config": {
+                "enabled": True,
+            },
+        }
+        with VllmRunner("facebook/opt-125m",
+                        enforce_eager=True,
+                        additional_config=input_additional_config_fake_3):
+            pass
+
+
+@_clean_up_ascend_config
+def test_ascend_config_refresh():
+    from vllm.config import get_current_vllm_config
+    vllm_config = get_current_vllm_config()
+    # set additional_config with none
+    init_ascend_config(vllm_config)
+
+    input_additional_config = {
+        "torchair_graph_config": {
+            "enabled": False,
+            "use_cached_graph": True,
+            "graph_batch_sizes": [1, 2, 4, 8],
+            "graph_batch_sizes_init": False,
+        },
+        "ascend_scheduler_config": {
+            "enabled": True,
+            "enable_chunked_prefill": True,
+        },
+        "expert_tensor_parallel_size": 1,
+        "refresh": True,
+    }
+
+    # refresh ascend config
+    with VllmRunner("facebook/opt-125m",
+                    additional_config=input_additional_config):
+        ascend_config = get_ascend_config()
+
+        assert not ascend_config.torchair_graph_config.enabled
+        assert ascend_config.torchair_graph_config.use_cached_graph
+        assert ascend_config.torchair_graph_config.graph_batch_sizes == [
+            1, 2, 4, 8
+        ]
+        assert not ascend_config.torchair_graph_config.graph_batch_sizes_init
+        assert ascend_config.ascend_scheduler_config.enabled
+        assert ascend_config.ascend_scheduler_config.enable_chunked_prefill
+        assert ascend_config.expert_tensor_parallel_size == 1
diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py
@@ -82,8 +82,11 @@ def __init__(self, ascend_scheduler_config: dict):
 
 
 def init_ascend_config(vllm_config):
+    additional_config = vllm_config.additional_config if vllm_config.additional_config is not None else {}
+    refresh = additional_config.get("refresh",
+                                    False) if additional_config else False
     global _ASCEND_CONFIG
-    if _ASCEND_CONFIG is not None:
+    if _ASCEND_CONFIG is not None and not refresh:
         return _ASCEND_CONFIG
     _ASCEND_CONFIG = AscendConfig(vllm_config)
     return _ASCEND_CONFIG
@@ -126,7 +129,7 @@ def check_ascend_config(vllm_config, enforce_eager):
                     "Torchair graph mode only works with deepseek model.")
 
     # for V1 Engine, aclgraph doesn't work with deepseek model and only qwen model is well tested.
-    if envs.VLLM_USE_V1 and vllm_config.model_config is not None and not enforce_eager:
+    if envs.VLLM_USE_V1 and vllm_config.model_config is not None and not enforce_eager and not ascend_config.torchair_graph_config.enabled:
         model_type = vllm_config.model_config.hf_config.model_type
         if "deepseek" in model_type:
             raise NotImplementedError(
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -323,7 +323,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
 
         ascend_config = get_ascend_config()
         self.torchair_graph_enabled = ascend_config.torchair_graph_config.enabled and self.vllm_config.model_config.use_mla
-        self.torchair_graph_use_cached_npu_graph = ascend_config.torchair_graph_config.use_cached_graph
+        self.use_cached_npu_graph = ascend_config.torchair_graph_config.use_cached_graph
         self.torchair_graph_batch_sizes = ascend_config.torchair_graph_config.graph_batch_sizes
 
         if ascend_config.torchair_graph_config.graph_batch_sizes_init: