[Feature] Add CustomQwen3MoeForCausalLM model (#925)

yiz-liu · web-flow · commit 17f05b10893b · 2025-05-23T15:50:48.000+08:00
Tweak packed_modules_mapping to support W8A8 weights.  ### What this PR does / why we need it?  ### Does this PR introduce _any_ user-facing change?  ### How was this patch tested?  Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
diff --git a/vllm_ascend/models/__init__.py b/vllm_ascend/models/__init__.py
@@ -29,3 +29,7 @@ def register_model():
     ModelRegistry.register_model(
         "DeepseekV3ForCausalLM",
         "vllm_ascend.models.deepseek_v2:CustomDeepseekV3ForCausalLM")
+
+    ModelRegistry.register_model(
+        "Qwen3MoeForCausalLM",
+        "vllm_ascend.models.qwen3_moe:CustomQwen3MoeForCausalLM")
diff --git a/vllm_ascend/models/qwen3_moe.py b/vllm_ascend/models/qwen3_moe.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Adapted from vllm/model_executor/models/qwen3_moe.py
+# This file is a part of the vllm-ascend project.
+
+from vllm.model_executor.models.qwen3_moe import Qwen3MoeForCausalLM
+
+
+class CustomQwen3MoeForCausalLM(Qwen3MoeForCausalLM):
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+        "experts":
+        ["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"],
+    }