15
15
# This file is a part of the vllm-ascend project.
16
16
#
17
17
18
- name : Accuracy Tests
18
+ # This test will be triggered:
19
+ # 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
20
+ # 2. workflow_dispatch with models input
21
+ # See detail rule in strategy.matrix note
22
+ name : Benchmarks / accuracy
19
23
20
24
on :
25
+ pull_request :
26
+ types : [ labeled ]
21
27
workflow_dispatch :
22
28
inputs :
23
29
vllm-version :
24
- description : ' what vllm version to accuracy test? '
30
+ description : ' vllm version: '
25
31
required : true
26
- type : string
32
+ type : choice
33
+ # Please also update this when bump matched version
34
+ # Current supported vLLM versions
35
+ options :
36
+ - main
37
+ - v0.9.0.1
38
+ - v0.9.0
39
+ - v0.7.3
27
40
vllm-ascend-version :
28
- description : ' what vllm-ascend version to accuracy test? '
41
+ description : ' vllm-ascend version: '
29
42
required : true
30
- type : string
43
+ type : choice
44
+ options :
45
+ - main
46
+ - v0.7.3-dev
31
47
models :
32
- description : ' choose model(all/Qwen2.5-7B-Instruct/Llama-3.1-8B-Instruct/Qwen2.5-VL-7B-Instruct/Qwen3-8B-Base) '
48
+ description : ' model: '
33
49
required : true
34
50
type : choice
35
51
options :
36
52
- all
37
53
- Qwen/Qwen2.5-7B-Instruct
38
- - meta-llama/Llama-3.1-8B-Instruct
39
54
- Qwen/Qwen2.5-VL-7B-Instruct
40
55
- Qwen/Qwen3-8B-Base
41
56
default : ' all'
@@ -47,27 +62,73 @@ defaults:
47
62
run :
48
63
shell : bash -el {0}
49
64
65
+ concurrency :
66
+ group : pr-${{ github.event.pull_request.number }}
67
+ cancel-in-progress : true
68
+
50
69
jobs :
51
- model_tests :
52
- name : Model Test - ${{ matrix.model_name }}
53
- runs-on : ' linux-arm64-npu-2'
70
+ accuracy_tests :
71
+ # test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
72
+ if : >-
73
+ ${{
74
+ (contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
75
+ contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
76
+ contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
77
+ contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
78
+ github.event_name == 'workflow_dispatch'
79
+ }}
80
+ runs-on : >-
81
+ ${{
82
+ (matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
83
+ 'linux-arm64-npu-2'
84
+ }}
54
85
strategy :
55
86
matrix :
56
- include : ${{ fromJSON(
57
- (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]') ||
58
- (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"}]') ||
59
- (github.event.inputs.models == 'meta-llama/Llama-3.1-8B-Instruct' && '[{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"}]') ||
60
- (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}]') ||
61
- (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]')
87
+ vllm_use_version : [0, 1]
88
+ # the accuracy test will run:
89
+ # 1. workflow_dispatch with models input
90
+ # - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
91
+ # - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
92
+ # 2. PR labeled with "*-accuracy-test"
93
+ # - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
94
+ # - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
95
+ # - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
96
+ model_name : ${{ fromJSON(
97
+ (github.event.inputs.models == 'all' &&
98
+ ' ["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","model_name":"Qwen/Qwen3-8B-Base"]' ) ||
99
+ (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
100
+ ' ["Qwen/Qwen2.5-7B-Instruct"]' ) ||
101
+ (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
102
+ ' ["Qwen/Qwen2.5-VL-7B-Instruct"]' ) ||
103
+ (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
104
+ ' ["Qwen/Qwen3-8B-Base"]' ) ||
105
+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
106
+ ' ["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
107
+ contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
108
+ ' ["Qwen/Qwen2.5-7B-Instruct"]' ||
109
+ contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
110
+ ' ["Qwen/Qwen2.5-VL-7B-Instruct"]'
62
111
) }}
63
- fail-fast : false
112
+ # Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
113
+ exclude :
114
+ - model_name : Qwen/Qwen2.5-VL-7B-Instruct
115
+ vllm_use_version : 1
64
116
117
+ fail-fast : false
118
+ name : ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
65
119
container :
66
120
image : m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
67
121
env :
68
122
HF_ENDPOINT : https://hf-mirror.com
69
123
HF_TOKEN : ${{ secrets.HF_TOKEN }}
70
124
DATASET_SOURCE : ModelScope
125
+ VLLM_USE_MODELSCOPE : True
126
+ # 1. If version specified (work_dispatch), do specified branch accuracy test
127
+ # 2. If no version (labeled PR), do accuracy test by default ref:
128
+ # The branch, tag or SHA to checkout. When checking out the repository that
129
+ # triggered a workflow, this defaults to the reference or SHA for that event.
130
+ # Otherwise, uses the default branch.
131
+ GHA_VLLM_ASCEND_VERSION : ${{ github.event.inputs.vllm-ascend-version }}
71
132
72
133
steps :
73
134
- name : Checkout repository
@@ -96,53 +157,30 @@ jobs:
96
157
with :
97
158
repository : vllm-project/vllm
98
159
path : ./vllm-empty
99
- ref : ${{ github.event.inputs.vllm-version }}
160
+ # Please also update this when bump matched version
161
+ ref : ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
100
162
101
163
- name : Install vllm-project/vllm from source
102
164
working-directory : ./vllm-empty
103
165
run : VLLM_TARGET_DEVICE=empty pip install -e .
104
-
105
166
106
167
- name : Checkout vllm-project/vllm-ascend repo
107
168
uses : actions/checkout@v4
108
169
with :
109
170
repository : vllm-project/vllm-ascend
110
171
path : ./vllm-ascend
111
- ref : ${{ github.event.inputs.vllm-ascend-version }}
112
- fetch-depth : 0
113
-
114
- - name : Install pta
115
- run : |
116
- if [ ! -d /root/.cache/pta ]; then
117
- mkdir -p /root/.cache/pta
118
- fi
119
- if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
120
- cd /root/.cache/pta
121
- rm -rf pytorch_v2.5.1_py310*
122
- wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
123
- tar -zxvf pytorch_v2.5.1_py310.tar.gz
124
- fi
125
- pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
172
+ ref : ${{ env.GHA_VLLM_ASCEND_VERSION }}
126
173
127
174
- name : Install vllm-project/vllm-ascend
128
175
working-directory : ./vllm-ascend
129
176
run : |
130
177
pip install -r requirements-dev.txt
131
178
pip install -e .
132
-
133
- - name : Checkout EleutherAI/lm-evaluation-harness repo
134
- uses : actions/checkout@v4
135
- with :
136
- repository : EleutherAI/lm-evaluation-harness
137
- path : ./lm-eval
138
- fetch-depth : 0
139
179
140
- - name : Install EleutherAI/lm-evaluation-harness
141
- working-directory : ./lm-eval
180
+ - name : Install lm-eval, ray, and datasets
142
181
run : |
143
- pip install -e .
144
- pip install ray datasets==2.16.0
145
-
182
+ pip install lm-eval
183
+
146
184
- name : Collect version info
147
185
run : |
148
186
for dir in /usr/local/Ascend/ascend-toolkit/*; do
@@ -153,45 +191,57 @@ jobs:
153
191
fi
154
192
done
155
193
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
156
- CANN_VERSION =$(grep "version=" "$INFO_FILE" \
194
+ GHA_CANN_VERSION =$(grep "version=" "$INFO_FILE" \
157
195
| head -n1 \
158
196
| cut -d'=' -f2 \
159
197
| tr -d '"')
160
198
{
161
- echo "CANN_VERSION=$CANN_VERSION "
162
- pip show torch | grep "Version:" | awk '{print "TORCH_VERSION ="$2}'
163
- pip show torch_npu | grep "Version:" | awk '{print "TORCH_NPU_VERSION ="$2}'
164
- pip show vllm | grep "Version:" | awk '{print "VLLM_VERSION ="$2}' | sed 's/+.*//'
199
+ echo "GHA_CANN_VERSION=$GHA_CANN_VERSION "
200
+ pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION ="$2}'
201
+ pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION ="$2}'
202
+ pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION ="$2}' | sed 's/+.*//'
165
203
} >> "$GITHUB_ENV"
166
204
167
205
- name : Print versions
168
206
run : |
169
- echo "CANN: ${{ env.CANN_VERSION }}"
170
- echo "Torch NPU: ${{ env.TORCH_NPU_VERSION }}"
171
- echo "Torch: ${{ env.TORCH_VERSION }}"
172
- echo "vLLM: ${{ env.VLLM_VERSION }}"
173
-
174
- - name : Run Accuracy Test for V0
207
+ echo "CANN: ${{ env.GHA_CANN_VERSION }}"
208
+ echo "Torch NPU: ${{ env.GHA_TORCH_NPU_VERSION }}"
209
+ echo "Torch: ${{ env.GHA_TORCH_VERSION }}"
210
+ echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
211
+ echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}"
212
+
213
+ - name : Run Accuracy Test for V${{ matrix.vllm_use_version }}
214
+ id : report
175
215
working-directory : ./benchmarks
176
216
env :
177
- VLLM_USE_V1 : 0
178
217
PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
218
+ VLLM_USE_V1 : ${{ matrix.vllm_use_version }}
179
219
run : |
180
- mkdir -p ./accuracy/V0
220
+ model_base_name=$(basename ${{ matrix.model_name }})
221
+ markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}"
222
+ echo "markdown_name=$markdown_name"
223
+ echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
224
+ mkdir -p ./accuracy
225
+
181
226
python ./scripts/run_accuracy.py \
182
227
--model "${{ matrix.model_name }}" \
183
- --output "./accuracy/V0/${{ matrix.output_file }}.md" \
184
- --vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version }}" \
185
- --cann_version "${{ env.CANN_VERSION }}" \
186
- --torch_npu_version "${{ env.TORCH_NPU_VERSION }}" \
187
- --torch_version "${{ env.TORCH_VERSION }}" \
188
- --vllm_version "${{ env.VLLM_VERSION }}"
189
-
190
- - name : Upload Report for V0
228
+ --output "./accuracy/${markdown_name}.md" \
229
+ --vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
230
+ --cann_version "${{ env.GHA_CANN_VERSION }}" \
231
+ --torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
232
+ --torch_version "${{ env.GHA_TORCH_VERSION }}" \
233
+ --vllm_version "${{ env.GHA_VLLM_VERSION }}"
234
+
235
+ - name : Generate step summary
236
+ if : ${{ always() }}
237
+ run : |
238
+ cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
239
+
240
+ - name : Upload Report for V${{ matrix.vllm_use_version }}
191
241
uses : actions/upload-artifact@v4
192
242
with :
193
- name : " ${{ github.event.inputs.vllm-ascend-version }}-${{ matrix.output_file }}-V0 -report"
194
- path : ./benchmarks/accuracy/V0/ ${{ matrix.output_file }}.md
243
+ name : " ${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report"
244
+ path : ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
195
245
if-no-files-found : warn
196
246
retention-days : 90
197
247
overwrite : true
0 commit comments