fix

Yikun · Yikun · commit 6a1c5102fded · 2025-06-01T00:47:34.000+08:00
Signed-off-by: Yikun Jiang &lt;yikunkero@gmail.com&gt;
diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml
@@ -15,9 +15,11 @@
 # This file is a part of the vllm-ascend project.
 #
 
-name: Accuracy Tests
+name: Benchmarks / accuracy
 
 on:
+  pull_request:
+    types: [ labeled ]
   workflow_dispatch:
     inputs:
       vllm-version:
@@ -26,10 +28,8 @@ on:
         type: choice
         options:
           - main
+          - v0.9.0.1
           - v0.9.0
-          - v0.8.5.post1
-          - v0.8.5
-          - v0.8.4
           - v0.7.3
       vllm-ascend-version:
         description: 'vllm-ascend version:'
@@ -38,22 +38,14 @@ on:
         options:
           - main
           - v0.7.3-dev
-          - v0.7.3
-          - v0.8.5rc1
-          - v0.8.4rc2
-          - v0.8.4rc1
-          - v0.7.3rc2
-          - v0.7.3rc1
       models:
         description: 'model:'
         required: true
         type: choice
         options:
           - all
           - Qwen/Qwen2.5-7B-Instruct
-          - meta-llama/Llama-3.1-8B-Instruct
           - Qwen/Qwen2.5-VL-7B-Instruct
-          - Qwen/Qwen3-8B-Base
         default: 'all'
 
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
@@ -65,16 +57,31 @@ defaults:
 
 jobs:
   model_tests:
+    # test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or schedule job
+    if:  >-
+      ${{
+      (contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
+      contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
+      contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
+      github.event_name == 'workflow_dispatch'
+      }}
     name: Model Test - ${{ matrix.model_name }}
     runs-on: 'linux-arm64-npu-2'
     strategy:
       matrix:
+        # the accuracy test will run:
+        # 1. workflow_dispatch with models input
+        #   - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
+        #   - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
+        # 2. PR labeled with "*-accuracy-test"
+        #   - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
+        #   - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
         include: ${{ fromJSON(
-          (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]') ||
-          (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"}]') ||
-          (github.event.inputs.models == 'meta-llama/Llama-3.1-8B-Instruct' && '[{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"}]') ||
-          (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}]') ||
-          (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' && '[{"model_name":"Qwen/Qwen3-8B-Base","output_file":"Qwen3-8B-Base"}]')
+          (github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B-Base"}]') ||
+          (github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]') ||
+          (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}]') ||
+          contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct"}]' ||
+          contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct"}]'
          ) }}
       fail-fast: false
 
@@ -84,6 +91,7 @@ jobs:
         HF_ENDPOINT: https://hf-mirror.com
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
         DATASET_SOURCE: ModelScope
+        VLLM_USE_MODELSCOPE: True
 
     steps:
       - name: Checkout repository
@@ -112,18 +120,20 @@ jobs:
         with:
           repository: vllm-project/vllm
           path: ./vllm-empty
-          ref: ${{ github.event.inputs.vllm-version }}
+          # Please also change this when bump matched version
+          ref: ${{ github.event.inputs.vllm-version  || 'v0.9.0' }}
 
       - name: Install vllm-project/vllm from source
         working-directory: ./vllm-empty
         run: VLLM_TARGET_DEVICE=empty pip install -e .
-      
 
       - name: Checkout vllm-project/vllm-ascend repo
         uses: actions/checkout@v4
         with:
           repository: vllm-project/vllm-ascend
           path: ./vllm-ascend
+          # 1. If version specified (work_dispatch), do specified branch test
+          # 2. If no version (labeled PR), do PR accuracy test
           ref: ${{ github.event.inputs.vllm-ascend-version }}
           fetch-depth: 0
 
@@ -133,7 +143,7 @@ jobs:
           pip install -r requirements-dev.txt
           pip install -e .
           
-      - name: Install EleutherAI/lm-evaluation-harness
+      - name: Install lm-eval, ray, and datasets
         run: |
             pip install lm-eval ray datasets==2.16.0
           
@@ -166,26 +176,32 @@ jobs:
           echo "vLLM: ${{ env.VLLM_VERSION }}"
 
       - name: Run Accuracy Test for V0
+        id: report
         working-directory: ./benchmarks
         env:
           VLLM_USE_V1: 0
           PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
         run: |
+          model_base_name=$(basename ${{ matrix.model_name }})
+          echo "model_base_name=$model_base_name"
+          echo "model_base_name=$model_base_name" >> $GITHUB_OUTPUT
           mkdir -p ./accuracy/V0
+          echo "test111" > ./accuracy/V0/${model_base_name}.md
           python ./scripts/run_accuracy.py \
             --model "${{ matrix.model_name }}" \
-            --output "./accuracy/V0/${{ matrix.output_file }}.md" \
-            --vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version }}" \
+            --output "./accuracy/V0/${model_base_name}.md" \
+            --vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version || 'current' }}" \
             --cann_version "${{ env.CANN_VERSION }}" \
             --torch_npu_version "${{ env.TORCH_NPU_VERSION }}" \
             --torch_version "${{ env.TORCH_VERSION }}" \
             --vllm_version "${{ env.VLLM_VERSION }}"
+          cat ./accuracy/V0/${model_base_name}.md
 
       - name: Upload Report for V0
         uses: actions/upload-artifact@v4
         with:
-          name: "${{ github.event.inputs.vllm-ascend-version }}-${{ matrix.output_file }}-V0-report"
-          path: ./benchmarks/accuracy/V0/${{ matrix.output_file }}.md
+          name: "${{ github.event.inputs.vllm-ascend-version }}-${{ steps.report.outputs.model_base_name }}-V0-report"
+          path: ./benchmarks/accuracy/V0/${{ steps.report.outputs.model_base_name }}.md
           if-no-files-found: warn
           retention-days: 90
           overwrite: true
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
@@ -27,9 +27,9 @@ on:
     paths:
       - '*.txt'
       - '**/*.py'
-      - '.github/workflows/vllm_ascend_test.yaml'
       - '!docs/**'
       - 'pytest.ini'
+      - '!benchmarks/**'
 # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
 # declared as "shell: bash -el {0}" on steps that need to be properly activated.
 # It's used to activate ascend-toolkit environment variables.