15
15
# This file is a part of the vllm-ascend project.
16
16
#
17
17
18
- name : Accuracy Tests
18
+ name : Benchmarks / accuracy
19
19
20
20
on :
21
+ pull_request :
22
+ types : [ labeled ]
21
23
workflow_dispatch :
22
24
inputs :
23
25
vllm-version :
26
28
type : choice
27
29
options :
28
30
- main
31
+ - v0.9.0.1
29
32
- v0.9.0
30
- - v0.8.5.post1
31
- - v0.8.5
32
- - v0.8.4
33
33
- v0.7.3
34
34
vllm-ascend-version :
35
35
description : ' vllm-ascend version:'
38
38
options :
39
39
- main
40
40
- v0.7.3-dev
41
- - v0.7.3
42
- - v0.8.5rc1
43
- - v0.8.4rc2
44
- - v0.8.4rc1
45
- - v0.7.3rc2
46
- - v0.7.3rc1
47
41
models :
48
42
description : ' model:'
49
43
required : true
50
44
type : choice
51
45
options :
52
46
- all
53
47
- Qwen/Qwen2.5-7B-Instruct
54
- - meta-llama/Llama-3.1-8B-Instruct
55
48
- Qwen/Qwen2.5-VL-7B-Instruct
56
- - Qwen/Qwen3-8B-Base
57
49
default : ' all'
58
50
59
51
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
@@ -65,16 +57,31 @@ defaults:
65
57
66
58
jobs :
67
59
model_tests :
60
+ # test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or schedule job
61
+ if : >-
62
+ ${{
63
+ (contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
64
+ contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
65
+ contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
66
+ github.event_name == 'workflow_dispatch'
67
+ }}
68
68
name : Model Test - ${{ matrix.model_name }}
69
69
runs-on : ' linux-arm64-npu-2'
70
70
strategy :
71
71
matrix :
72
+ # the accuracy test will run:
73
+ # 1. workflow_dispatch with models input
74
+ # - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
75
+ # - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
76
+ # 2. PR labeled with "*-accuracy-test"
77
+ # - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
78
+ # - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
72
79
include : ${{ fromJSON(
73
- (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct" },{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct" },{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct" }, {"model_name":"Qwen/Qwen3-8B-Base","output_file":" Qwen3-8B-Base"}]') ||
74
- (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct" }]') ||
75
- (github.event.inputs.models == 'meta-llama/Llama-3.1-8B- Instruct' && '[{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B -Instruct"}]') ||
76
- (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL- 7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct" }]') ||
77
- (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base "}]')
80
+ (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B-Base"}]') ||
81
+ (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]') ||
82
+ (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B- Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B -Instruct"}]') ||
83
+ contains (github.event.pull_request.labels.*.name, 'dense-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]' ||
84
+ contains (github.event.pull_request.labels.*.name, 'vl-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct "}]'
78
85
) }}
79
86
fail-fast : false
80
87
84
91
HF_ENDPOINT : https://hf-mirror.com
85
92
HF_TOKEN : ${{ secrets.HF_TOKEN }}
86
93
DATASET_SOURCE : ModelScope
94
+ VLLM_USE_MODELSCOPE : True
87
95
88
96
steps :
89
97
- name : Checkout repository
@@ -112,18 +120,20 @@ jobs:
112
120
with :
113
121
repository : vllm-project/vllm
114
122
path : ./vllm-empty
115
- ref : ${{ github.event.inputs.vllm-version }}
123
+ # Please also change this when bump matched version
124
+ ref : ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
116
125
117
126
- name : Install vllm-project/vllm from source
118
127
working-directory : ./vllm-empty
119
128
run : VLLM_TARGET_DEVICE=empty pip install -e .
120
-
121
129
122
130
- name : Checkout vllm-project/vllm-ascend repo
123
131
uses : actions/checkout@v4
124
132
with :
125
133
repository : vllm-project/vllm-ascend
126
134
path : ./vllm-ascend
135
+ # 1. If version specified (work_dispatch), do specified branch test
136
+ # 2. If no version (labeled PR), do PR accuracy test
127
137
ref : ${{ github.event.inputs.vllm-ascend-version }}
128
138
fetch-depth : 0
129
139
@@ -133,7 +143,7 @@ jobs:
133
143
pip install -r requirements-dev.txt
134
144
pip install -e .
135
145
136
- - name : Install EleutherAI/ lm-evaluation-harness
146
+ - name : Install lm-eval, ray, and datasets
137
147
run : |
138
148
pip install lm-eval ray datasets==2.16.0
139
149
@@ -166,26 +176,32 @@ jobs:
166
176
echo "vLLM: ${{ env.VLLM_VERSION }}"
167
177
168
178
- name : Run Accuracy Test for V0
179
+ id : report
169
180
working-directory : ./benchmarks
170
181
env :
171
182
VLLM_USE_V1 : 0
172
183
PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
173
184
run : |
185
+ model_base_name=$(basename ${{ matrix.model_name }})
186
+ echo "model_base_name=$model_base_name"
187
+ echo "model_base_name=$model_base_name" >> $GITHUB_OUTPUT
174
188
mkdir -p ./accuracy/V0
189
+ echo "test111" > ./accuracy/V0/${model_base_name}.md
175
190
python ./scripts/run_accuracy.py \
176
191
--model "${{ matrix.model_name }}" \
177
- --output "./accuracy/V0/${{ matrix.output_file } }.md" \
178
- --vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version }}" \
192
+ --output "./accuracy/V0/${model_base_name }.md" \
193
+ --vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version || 'current' }}" \
179
194
--cann_version "${{ env.CANN_VERSION }}" \
180
195
--torch_npu_version "${{ env.TORCH_NPU_VERSION }}" \
181
196
--torch_version "${{ env.TORCH_VERSION }}" \
182
197
--vllm_version "${{ env.VLLM_VERSION }}"
198
+ cat ./accuracy/V0/${model_base_name}.md
183
199
184
200
- name : Upload Report for V0
185
201
uses : actions/upload-artifact@v4
186
202
with :
187
- name : " ${{ github.event.inputs.vllm-ascend-version }}-${{ matrix.output_file }}-V0-report"
188
- path : ./benchmarks/accuracy/V0/${{ matrix.output_file }}.md
203
+ name : " ${{ github.event.inputs.vllm-ascend-version }}-${{ steps.report.outputs.model_base_name }}-V0-report"
204
+ path : ./benchmarks/accuracy/V0/${{ steps.report.outputs.model_base_name }}.md
189
205
if-no-files-found : warn
190
206
retention-days : 90
191
207
overwrite : true
0 commit comments