15
15
# This file is a part of the vllm-ascend project.
16
16
#
17
17
18
+ # This test will be triggered:
19
+ # 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
20
+ # 2. workflow_dispatch with models input
21
+ # See detail rule in strategy.matrix note
18
22
name : Benchmarks / accuracy
19
23
20
24
on :
26
30
description : ' vllm version:'
27
31
required : true
28
32
type : choice
33
+ # Please also update this when bump matched version
34
+ # Current supported vLLM versions
29
35
options :
30
36
- main
31
37
- v0.9.0.1
46
52
- all
47
53
- Qwen/Qwen2.5-7B-Instruct
48
54
- Qwen/Qwen2.5-VL-7B-Instruct
55
+ - Qwen/Qwen3-8B-Base
49
56
default : ' all'
50
57
51
58
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
@@ -56,16 +63,16 @@ defaults:
56
63
shell : bash -el {0}
57
64
58
65
jobs :
59
- model_tests :
60
- # test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or schedule job
66
+ accuracy_tests :
67
+ # test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
61
68
if : >-
62
69
${{
63
- (contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
70
+ (contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
71
+ contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
64
72
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
65
73
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
66
74
github.event_name == 'workflow_dispatch'
67
75
}}
68
- name : Model Test - ${{ matrix.model_name }}
69
76
runs-on : ' linux-arm64-npu-2'
70
77
strategy :
71
78
matrix :
@@ -74,24 +81,35 @@ jobs:
74
81
# - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
75
82
# - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
76
83
# 2. PR labeled with "*-accuracy-test"
84
+ # - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
77
85
# - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
78
86
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
79
87
include : ${{ fromJSON(
80
- (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct"},{"model_name":" Qwen/Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B-Base"}]') ||
88
+ (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"},{"model_name":"Qwen/Qwen3-8B-Base"}]') ||
81
89
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]') ||
82
90
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}]') ||
91
+ (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base"}]') ||
92
+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}]' ||
83
93
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]' ||
84
94
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}]'
85
95
) }}
86
96
fail-fast : false
87
-
97
+ name : ${{ matrix.model_name }} accuracy
88
98
container :
89
99
image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
90
100
env :
91
101
HF_ENDPOINT : https://hf-mirror.com
92
102
HF_TOKEN : ${{ secrets.HF_TOKEN }}
93
103
DATASET_SOURCE : ModelScope
94
104
VLLM_USE_MODELSCOPE : True
105
+ # Please also update this when bump matched version
106
+ GHA_VLLM_VERSION : ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
107
+ # 1. If version specified (work_dispatch), do specified branch accuracy test
108
+ # 2. If no version (labeled PR), do accuracy test by default ref:
109
+ # The branch, tag or SHA to checkout. When checking out the repository that
110
+ # triggered a workflow, this defaults to the reference or SHA for that event.
111
+ # Otherwise, uses the default branch.
112
+ GHA_VLLM_ASCEND_VERSION : ${{ github.event.inputs.vllm-ascend-version }}
95
113
96
114
steps :
97
115
- name : Checkout repository
@@ -120,8 +138,7 @@ jobs:
120
138
with :
121
139
repository : vllm-project/vllm
122
140
path : ./vllm-empty
123
- # Please also change this when bump matched version
124
- ref : ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
141
+ ref : ${{ env.GHA_VLLM_VERSION }}
125
142
126
143
- name : Install vllm-project/vllm from source
127
144
working-directory : ./vllm-empty
@@ -132,9 +149,7 @@ jobs:
132
149
with :
133
150
repository : vllm-project/vllm-ascend
134
151
path : ./vllm-ascend
135
- # 1. If version specified (work_dispatch), do specified branch test
136
- # 2. If no version (labeled PR), do PR accuracy test
137
- ref : ${{ github.event.inputs.vllm-ascend-version }}
152
+ ref : ${{ env.GHA_VLLM_ASCEND_VERSION }}
138
153
fetch-depth : 0
139
154
140
155
- name : Install vllm-project/vllm-ascend
@@ -157,23 +172,24 @@ jobs:
157
172
fi
158
173
done
159
174
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
160
- CANN_VERSION =$(grep "version=" "$INFO_FILE" \
175
+ GHA_CANN_VERSION =$(grep "version=" "$INFO_FILE" \
161
176
| head -n1 \
162
177
| cut -d'=' -f2 \
163
178
| tr -d '"')
164
179
{
165
- echo "CANN_VERSION=$CANN_VERSION "
166
- pip show torch | grep "Version:" | awk '{print "TORCH_VERSION ="$2}'
167
- pip show torch_npu | grep "Version:" | awk '{print "TORCH_NPU_VERSION ="$2}'
168
- pip show vllm | grep "Version:" | awk '{print "VLLM_VERSION ="$2}' | sed 's/+.*//'
180
+ echo "GHA_CANN_VERSION=$GHA_CANN_VERSION "
181
+ pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION ="$2}'
182
+ pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION ="$2}'
183
+ pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION ="$2}' | sed 's/+.*//'
169
184
} >> "$GITHUB_ENV"
170
185
171
186
- name : Print versions
172
187
run : |
173
- echo "CANN: ${{ env.CANN_VERSION }}"
174
- echo "Torch NPU: ${{ env.TORCH_NPU_VERSION }}"
175
- echo "Torch: ${{ env.TORCH_VERSION }}"
176
- echo "vLLM: ${{ env.VLLM_VERSION }}"
188
+ echo "CANN: ${{ env.GHA_CANN_VERSION }}"
189
+ echo "Torch NPU: ${{ env.GHA_TORCH_NPU_VERSION }}"
190
+ echo "Torch: ${{ env.GHA_TORCH_VERSION }}"
191
+ echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
192
+ echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
177
193
178
194
- name : Run Accuracy Test for V0
179
195
id : report
@@ -186,21 +202,22 @@ jobs:
186
202
echo "model_base_name=$model_base_name"
187
203
echo "model_base_name=$model_base_name" >> $GITHUB_OUTPUT
188
204
mkdir -p ./accuracy/V0
189
- echo "test111" > ./accuracy/V0/${model_base_name}.md
205
+
190
206
python ./scripts/run_accuracy.py \
191
207
--model "${{ matrix.model_name }}" \
192
208
--output "./accuracy/V0/${model_base_name}.md" \
193
- --vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version || 'current' }}" \
194
- --cann_version "${{ env.CANN_VERSION }}" \
195
- --torch_npu_version "${{ env.TORCH_NPU_VERSION }}" \
196
- --torch_version "${{ env.TORCH_VERSION }}" \
197
- --vllm_version "${{ env.VLLM_VERSION }}"
198
- cat ./accuracy/V0/${model_base_name}.md
209
+ --vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
210
+ --cann_version "${{ env.GHA_CANN_VERSION }}" \
211
+ --torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
212
+ --torch_version "${{ env.GHA_TORCH_VERSION }}" \
213
+ --vllm_version "${{ env.GHA_VLLM_VERSION }}"
214
+
215
+ cat ./accuracy/V0/${model_base_name}.md >> $GITHUB_STEP_SUMMARY
199
216
200
217
- name : Upload Report for V0
201
218
uses : actions/upload-artifact@v4
202
219
with :
203
- name : " ${{ github.event.inputs.vllm-ascend-version }}-${{ steps.report.outputs.model_base_name }}-V0-report"
220
+ name : " ${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.model_base_name }}-V0-report"
204
221
path : ./benchmarks/accuracy/V0/${{ steps.report.outputs.model_base_name }}.md
205
222
if-no-files-found : warn
206
223
retention-days : 90
0 commit comments