Skip to content

Commit cd4885d

Browse files
committed
Merge branch 'main' of github.com:vllm-project/vllm-ascend
Signed-off-by: depeng1994 <depengzhang@foxmail.com>
2 parents 5f1f78d + 543380c commit cd4885d

25 files changed

+638
-403
lines changed

.github/workflows/accuracy_report.yaml

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,6 @@ jobs:
6060
env:
6161
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
6262

63-
- name: Query artifact run id for Llama-3.1-8B-Instruct V0 latest artifact
64-
id: get_Llama_3_1_8B_Instruct_latest_run_id_V0
65-
run: |
66-
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
67-
RUN_ID=$(echo "$ARTIFACT_JSON" | \
68-
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Llama-3.1-8B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
69-
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
70-
env:
71-
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
72-
7363
- name: Query artifact run id for Qwen3-8B-Base V0 latest artifact
7464
id: get_Qwen3_8B_Base_latest_run_id_V0
7565
run: |
@@ -98,15 +88,6 @@ jobs:
9888
repository: vllm-project/vllm-ascend
9989
run-id: ${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
10090

101-
- name: Download meta-llama/Llama-3.1-8B-Instruct Artifact
102-
uses: actions/download-artifact@v4
103-
with:
104-
name: ${{ github.event.inputs.vllm-ascend-version }}-Llama-3.1-8B-Instruct-V0-report
105-
path: ./docs/source/developer_guide/evaluation/accuracy_report
106-
github-token: ${{ secrets.GITHUB_TOKEN }}
107-
repository: vllm-project/vllm-ascend
108-
run-id: ${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
109-
11091
- name: Download Qwen/Qwen3-8B-Base Artifact
11192
uses: actions/download-artifact@v4
11293
with:
@@ -120,15 +101,14 @@ jobs:
120101
working-directory: ./docs/source/developer_guide/evaluation/accuracy_report
121102
run: |
122103
cat ./Qwen2.5-VL-7B-Instruct.md
123-
cat ./Llama-3.1-8B-Instruct.md
124104
cat ./Qwen2.5-7B-Instruct.md
125105
cat ./Qwen3-8B-Base.md
126106
127107
- name: Create Pull Request for markdown update
128108
uses: peter-evans/create-pull-request@v7
129109
with:
130110
token: ${{ secrets.PR_TOKEN }}
131-
base: ${{ github.ref_name }}
111+
base: ${{ github.event.inputs.branch }}
132112
branch: auto-pr/accuracy-test
133113
commit-message: "Update accuracy report for ${{ github.event.inputs.branch }}"
134114
add-paths: ./docs/source/developer_guide/evaluation/accuracy_report/*.md
@@ -139,12 +119,10 @@ jobs:
139119
140120
- [Workflow run][1]
141121
- [Qwen2.5-7B-Instruct accuracy report][2]
142-
- [Llama-3.1-8B-Instruct accuracy report][3]
143-
- [Qwen2.5-VL-7B-Instruct accuracy report][4]
144-
- [Qwen3-8B-Base accuracy report][5]
122+
- [Qwen2.5-VL-7B-Instruct accuracy report][3]
123+
- [Qwen3-8B-Base accuracy report][4]
145124
146125
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
147126
[2]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
148-
[3]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
149-
[4]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
150-
[5]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_Base_latest_run_id_V0.outputs.runid }}
127+
[3]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
128+
[4]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_Base_latest_run_id_V0.outputs.runid }}

.github/workflows/accuracy_test.yaml

Lines changed: 119 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,42 @@
1515
# This file is a part of the vllm-ascend project.
1616
#
1717

18-
name: Accuracy Tests
18+
# This test will be triggered:
19+
# 1. PR labeled with: '*accuracy-test' (ONLY 1 label valid) & 'ready-for-test'
20+
# 2. workflow_dispatch with models input
21+
# See detail rule in strategy.matrix note
22+
name: Benchmarks / accuracy
1923

2024
on:
25+
pull_request:
26+
types: [ labeled ]
2127
workflow_dispatch:
2228
inputs:
2329
vllm-version:
24-
description: 'what vllm version to accuracy test?'
30+
description: 'vllm version:'
2531
required: true
26-
type: string
32+
type: choice
33+
# Please also update this when bump matched version
34+
# Current supported vLLM versions
35+
options:
36+
- main
37+
- v0.9.0.1
38+
- v0.9.0
39+
- v0.7.3
2740
vllm-ascend-version:
28-
description: 'what vllm-ascend version to accuracy test?'
41+
description: 'vllm-ascend version:'
2942
required: true
30-
type: string
43+
type: choice
44+
options:
45+
- main
46+
- v0.7.3-dev
3147
models:
32-
description: 'choose model(all/Qwen2.5-7B-Instruct/Llama-3.1-8B-Instruct/Qwen2.5-VL-7B-Instruct/Qwen3-8B-Base)'
48+
description: 'model:'
3349
required: true
3450
type: choice
3551
options:
3652
- all
3753
- Qwen/Qwen2.5-7B-Instruct
38-
- meta-llama/Llama-3.1-8B-Instruct
3954
- Qwen/Qwen2.5-VL-7B-Instruct
4055
- Qwen/Qwen3-8B-Base
4156
default: 'all'
@@ -47,27 +62,73 @@ defaults:
4762
run:
4863
shell: bash -el {0}
4964

65+
concurrency:
66+
group: pr-${{ github.event.pull_request.number }}
67+
cancel-in-progress: true
68+
5069
jobs:
51-
model_tests:
52-
name: Model Test - ${{ matrix.model_name }}
53-
runs-on: 'linux-arm64-npu-2'
70+
accuracy_tests:
71+
# test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
72+
if: >-
73+
${{
74+
(contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
75+
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
76+
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
77+
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
78+
github.event_name == 'workflow_dispatch'
79+
}}
80+
runs-on: >-
81+
${{
82+
(matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
83+
'linux-arm64-npu-2'
84+
}}
5485
strategy:
5586
matrix:
56-
include: ${{ fromJSON(
57-
(github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]') ||
58-
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"}]') ||
59-
(github.event.inputs.models == 'meta-llama/Llama-3.1-8B-Instruct' && '[{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"}]') ||
60-
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}]') ||
61-
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]')
87+
vllm_use_version: [0, 1]
88+
# the accuracy test will run:
89+
# 1. workflow_dispatch with models input
90+
# - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
91+
# - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
92+
# 2. PR labeled with "*-accuracy-test"
93+
# - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
94+
# - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
95+
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
96+
model_name: ${{ fromJSON(
97+
(github.event.inputs.models == 'all' &&
98+
'["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","model_name":"Qwen/Qwen3-8B-Base"]') ||
99+
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
100+
'["Qwen/Qwen2.5-7B-Instruct"]') ||
101+
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
102+
'["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
103+
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
104+
'["Qwen/Qwen3-8B-Base"]') ||
105+
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
106+
'["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
107+
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
108+
'["Qwen/Qwen2.5-7B-Instruct"]' ||
109+
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
110+
'["Qwen/Qwen2.5-VL-7B-Instruct"]'
62111
) }}
63-
fail-fast: false
112+
# Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
113+
exclude:
114+
- model_name: Qwen/Qwen2.5-VL-7B-Instruct
115+
vllm_use_version: 1
64116

117+
fail-fast: false
118+
name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
65119
container:
66120
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
67121
env:
68122
HF_ENDPOINT: https://hf-mirror.com
69123
HF_TOKEN: ${{ secrets.HF_TOKEN }}
70124
DATASET_SOURCE: ModelScope
125+
VLLM_USE_MODELSCOPE: True
126+
# 1. If version specified (work_dispatch), do specified branch accuracy test
127+
# 2. If no version (labeled PR), do accuracy test by default ref:
128+
# The branch, tag or SHA to checkout. When checking out the repository that
129+
# triggered a workflow, this defaults to the reference or SHA for that event.
130+
# Otherwise, uses the default branch.
131+
GHA_VLLM_ASCEND_VERSION: ${{ github.event.inputs.vllm-ascend-version }}
71132

72133
steps:
73134
- name: Checkout repository
@@ -96,53 +157,30 @@ jobs:
96157
with:
97158
repository: vllm-project/vllm
98159
path: ./vllm-empty
99-
ref: ${{ github.event.inputs.vllm-version }}
160+
# Please also update this when bump matched version
161+
ref: ${{ github.event.inputs.vllm-version || 'v0.9.0' }}
100162

101163
- name: Install vllm-project/vllm from source
102164
working-directory: ./vllm-empty
103165
run: VLLM_TARGET_DEVICE=empty pip install -e .
104-
105166

106167
- name: Checkout vllm-project/vllm-ascend repo
107168
uses: actions/checkout@v4
108169
with:
109170
repository: vllm-project/vllm-ascend
110171
path: ./vllm-ascend
111-
ref: ${{ github.event.inputs.vllm-ascend-version }}
112-
fetch-depth: 0
113-
114-
- name: Install pta
115-
run: |
116-
if [ ! -d /root/.cache/pta ]; then
117-
mkdir -p /root/.cache/pta
118-
fi
119-
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
120-
cd /root/.cache/pta
121-
rm -rf pytorch_v2.5.1_py310*
122-
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
123-
tar -zxvf pytorch_v2.5.1_py310.tar.gz
124-
fi
125-
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
172+
ref: ${{ env.GHA_VLLM_ASCEND_VERSION }}
126173

127174
- name: Install vllm-project/vllm-ascend
128175
working-directory: ./vllm-ascend
129176
run: |
130177
pip install -r requirements-dev.txt
131178
pip install -e .
132-
133-
- name: Checkout EleutherAI/lm-evaluation-harness repo
134-
uses: actions/checkout@v4
135-
with:
136-
repository: EleutherAI/lm-evaluation-harness
137-
path: ./lm-eval
138-
fetch-depth: 0
139179
140-
- name: Install EleutherAI/lm-evaluation-harness
141-
working-directory: ./lm-eval
180+
- name: Install lm-eval, ray, and datasets
142181
run: |
143-
pip install -e .
144-
pip install ray datasets==2.16.0
145-
182+
pip install lm-eval
183+
146184
- name: Collect version info
147185
run: |
148186
for dir in /usr/local/Ascend/ascend-toolkit/*; do
@@ -153,45 +191,57 @@ jobs:
153191
fi
154192
done
155193
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
156-
CANN_VERSION=$(grep "version=" "$INFO_FILE" \
194+
GHA_CANN_VERSION=$(grep "version=" "$INFO_FILE" \
157195
| head -n1 \
158196
| cut -d'=' -f2 \
159197
| tr -d '"')
160198
{
161-
echo "CANN_VERSION=$CANN_VERSION"
162-
pip show torch | grep "Version:" | awk '{print "TORCH_VERSION="$2}'
163-
pip show torch_npu | grep "Version:" | awk '{print "TORCH_NPU_VERSION="$2}'
164-
pip show vllm | grep "Version:" | awk '{print "VLLM_VERSION="$2}' | sed 's/+.*//'
199+
echo "GHA_CANN_VERSION=$GHA_CANN_VERSION"
200+
pip show torch | grep "Version:" | awk '{print "GHA_TORCH_VERSION="$2}'
201+
pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
202+
pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
165203
} >> "$GITHUB_ENV"
166204
167205
- name: Print versions
168206
run: |
169-
echo "CANN: ${{ env.CANN_VERSION }}"
170-
echo "Torch NPU: ${{ env.TORCH_NPU_VERSION }}"
171-
echo "Torch: ${{ env.TORCH_VERSION }}"
172-
echo "vLLM: ${{ env.VLLM_VERSION }}"
173-
174-
- name: Run Accuracy Test for V0
207+
echo "CANN: ${{ env.GHA_CANN_VERSION }}"
208+
echo "Torch NPU: ${{ env.GHA_TORCH_NPU_VERSION }}"
209+
echo "Torch: ${{ env.GHA_TORCH_VERSION }}"
210+
echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
211+
echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}"
212+
213+
- name: Run Accuracy Test for V${{ matrix.vllm_use_version }}
214+
id: report
175215
working-directory: ./benchmarks
176216
env:
177-
VLLM_USE_V1: 0
178217
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
218+
VLLM_USE_V1: ${{ matrix.vllm_use_version }}
179219
run: |
180-
mkdir -p ./accuracy/V0
220+
model_base_name=$(basename ${{ matrix.model_name }})
221+
markdown_name="${model_base_name}-V${{ matrix.vllm_use_version }}"
222+
echo "markdown_name=$markdown_name"
223+
echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
224+
mkdir -p ./accuracy
225+
181226
python ./scripts/run_accuracy.py \
182227
--model "${{ matrix.model_name }}" \
183-
--output "./accuracy/V0/${{ matrix.output_file }}.md" \
184-
--vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version }}" \
185-
--cann_version "${{ env.CANN_VERSION }}" \
186-
--torch_npu_version "${{ env.TORCH_NPU_VERSION }}" \
187-
--torch_version "${{ env.TORCH_VERSION }}" \
188-
--vllm_version "${{ env.VLLM_VERSION }}"
189-
190-
- name: Upload Report for V0
228+
--output "./accuracy/${markdown_name}.md" \
229+
--vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
230+
--cann_version "${{ env.GHA_CANN_VERSION }}" \
231+
--torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
232+
--torch_version "${{ env.GHA_TORCH_VERSION }}" \
233+
--vllm_version "${{ env.GHA_VLLM_VERSION }}"
234+
235+
- name: Generate step summary
236+
if: ${{ always() }}
237+
run: |
238+
cat ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md >> $GITHUB_STEP_SUMMARY
239+
240+
- name: Upload Report for V${{ matrix.vllm_use_version }}
191241
uses: actions/upload-artifact@v4
192242
with:
193-
name: "${{ github.event.inputs.vllm-ascend-version }}-${{ matrix.output_file }}-V0-report"
194-
path: ./benchmarks/accuracy/V0/${{ matrix.output_file }}.md
243+
name: "${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}-report"
244+
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
195245
if-no-files-found: warn
196246
retention-days: 90
197247
overwrite: true
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name: "Merge Conflict Labeler"
2+
on:
3+
# So that PRs touching the same files as the push are updated
4+
push:
5+
# So that the `dirtyLabel` is removed if conflicts are resolve
6+
# We recommend `pull_request_target` so that github secrets are available.
7+
# In `pull_request` we wouldn't be able to change labels of fork PRs
8+
pull_request_target:
9+
types: [synchronize]
10+
11+
jobs:
12+
main:
13+
runs-on: ubuntu-latest
14+
steps:
15+
- name: check if prs are dirty
16+
uses: eps1lon/actions-label-merge-conflict@v3
17+
with:
18+
dirtyLabel: "merge-conflicts"
19+
removeOnDirtyLabel: "ready"
20+
repoToken: "${{ secrets.GITHUB_TOKEN }}"
21+
commentOnDirty: "This pull request has conflicts, please resolve those before we can evaluate the pull request."

0 commit comments

Comments
 (0)