-
Notifications
You must be signed in to change notification settings - Fork 187
[CI] Refactor CI #952
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CI] Refactor CI #952
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,32 +30,27 @@ on: | |
- '.github/workflows/vllm_ascend_test.yaml' | ||
- '!docs/**' | ||
- 'pytest.ini' | ||
|
||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly | ||
# declared as "shell: bash -el {0}" on steps that need to be properly activated. | ||
# It's used to activate ascend-toolkit environment variables. | ||
defaults: | ||
run: | ||
shell: bash -el {0} | ||
|
||
concurrency: | ||
group: pr-${{ github.event.pull_request.number }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
test: | ||
strategy: | ||
max-parallel: 2 | ||
matrix: | ||
os: [linux-arm64-npu-1, linux-arm64-npu-4] | ||
vllm_verison: [main, v0.8.5.post1] | ||
vllm_version: [main, v0.8.5.post1] | ||
concurrency: | ||
group: > | ||
${{ | ||
matrix.os == 'linux-arm64-npu-4' | ||
&& github.event.pull_request.number | ||
&& format('pr-{0}-limit-npu-4', github.event.pull_request.number) | ||
|| format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_verison, github.event.pull_request.number) | ||
|| format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_version, github.event.pull_request.number) | ||
}} | ||
cancel-in-progress: false | ||
name: vLLM Ascend test | ||
|
@@ -66,6 +61,7 @@ jobs: | |
env: | ||
HF_ENDPOINT: https://hf-mirror.com | ||
HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
VLLM_LOGGING_LEVEL: ERROR | ||
steps: | ||
- name: Check npu and CANN info | ||
run: | | ||
|
@@ -92,7 +88,7 @@ jobs: | |
uses: actions/checkout@v4 | ||
with: | ||
repository: vllm-project/vllm | ||
ref: ${{ matrix.vllm_verison }} | ||
ref: ${{ matrix.vllm_version }} | ||
path: ./vllm-empty | ||
|
||
- name: Install vllm-project/vllm from source | ||
|
@@ -111,64 +107,32 @@ jobs: | |
VLLM_WORKER_MULTIPROC_METHOD: spawn | ||
run: | | ||
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then | ||
pytest -sv tests/singlecard/test_offline_inference.py | ||
pytest -sv tests/singlecard/test_ilama_lora.py | ||
pytest -sv tests/ops | ||
pytest -sv tests/compile | ||
VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py | ||
# AscendScheduler doesn't work, fix it later | ||
# pytest -sv tests/singlecard/tets_schedule.py | ||
# guided decoding doesn't work, fix it later | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be fixed by @shen-shanshan in #969 |
||
# pytest -sv tests/singlecard/test_guided_decoding.py.py | ||
pytest -sv tests/singlecard/ --ignore=tests/singlecard/test_offline_inference.py --ignore=tests/singlecard/test_scheduler.py --ignore=tests/singlecard/test_guided_decoding.py | ||
else | ||
pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py | ||
pytest -sv tests/multicard/test_ilama_lora_tp2.py | ||
pytest -sv tests/ops | ||
pytest -sv tests/compile | ||
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py | ||
fi | ||
|
||
- name: Run vllm-project/vllm-ascend test on V0 engine | ||
env: | ||
VLLM_USE_V1: 0 | ||
run: | | ||
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then | ||
pytest -sv tests/singlecard/test_ilama_lora.py | ||
pytest -sv tests/singlecard/test_offline_inference.py | ||
pytest -sv tests/ops | ||
VLLM_USE_MODELSCOPE=True pytest -sv tests/singlecard/test_offline_inference.py | ||
# AscendScheduler doesn't work, fix it later | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto @zzzzwwjj |
||
# pytest -sv tests/singlecard/tets_schedule.py | ||
# guided decoding doesn't work, fix it later | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto @shen-shanshan |
||
# pytest -sv tests/singlecard/test_guided_decoding.py.py | ||
pytest -sv tests/singlecard/ --ignore=tests/singlecard/test_offline_inference.py --ignore=tests/singlecard/test_scheduler.py --ignore=tests/singlecard/test_guided_decoding.py | ||
else | ||
pytest -sv tests/multicard/test_ilama_lora_tp2.py | ||
pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py | ||
pytest -sv -k "DeepSeek" tests/multicard/test_offline_inference_distributed.py | ||
pytest -sv tests/ops | ||
fi | ||
|
||
# only run test on spec decode when the related code changed | ||
- name: Check for changes in Speculative Decode | ||
if: github.event_name != 'schedule' | ||
id: filter_spec_decode | ||
uses: dorny/paths-filter@v3 | ||
with: | ||
filters: | | ||
speculative_tests_changed: | ||
- ".github/workflows/vllm_ascend_test.yaml" | ||
- "tests/singlecard/spec_decode/**" | ||
- "tests/multicard/spec_decode_e2e/**" | ||
- "vllm_ascend/worker/worker.py" | ||
- "vllm_ascend/worker/model_runner.py" | ||
- "vllm_ascend/worker/multi_step_runner.py" | ||
- "vllm_ascend/worker/multi_step_worker.py" | ||
- "vllm_ascend/worker/draft_model_runner.py" | ||
- "vllm_ascend/patch/worker/patch_common/patch_metrics.py" | ||
- "vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py" | ||
- "vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py" | ||
|
||
- name: Run vllm-project/vllm-ascend Speculative Decode test | ||
if: steps.filter_spec_decode.outputs.speculative_tests_changed == 'true' || github.event_name == 'schedule' | ||
run: | | ||
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then | ||
VLLM_USE_MODELSCOPE=true pytest -sv tests/singlecard/spec_decode/e2e/test_v1_spec_decode.py | ||
pytest -sv tests/singlecard/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process | ||
pytest -sv tests/singlecard/spec_decode --ignore=tests/singlecard/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/singlecard/spec_decode/e2e/test_v1_spec_decode.py | ||
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error. | ||
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ | ||
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek | ||
VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py | ||
fi | ||
|
||
- name: Run vllm-project/vllm test for V0 Engine | ||
env: | ||
VLLM_USE_V1: 0 | ||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 | ||
run: | | ||
pytest -sv |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# | ||
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. | ||
# This file is a part of the vllm-ascend project. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
name: 'e2e test / long-term-test' | ||
|
||
on: | ||
schedule: | ||
# Runs at 23:00 UTC (7:00 AM Beijing) every day | ||
- cron: '0 23 * * *' | ||
pull_request: | ||
types: [ labeled ] | ||
|
||
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly | ||
# declared as "shell: bash -el {0}" on steps that need to be properly activated. | ||
# It's used to activate ascend-toolkit environment variables. | ||
defaults: | ||
run: | ||
shell: bash -el {0} | ||
|
||
concurrency: | ||
group: pr-${{ github.event.pull_request.number }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
long-term-test: | ||
# long-term-test will be triggered when tag 'long-term-test' & 'ready-for-test' or schedule job | ||
if: ${{ contains(github.event.pull_request.labels.*.name, 'long-term-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }} | ||
strategy: | ||
max-parallel: 2 | ||
matrix: | ||
vllm_version: [main, v0.8.5.post1] | ||
name: vLLM Ascend long term test | ||
runs-on: linux-arm64-npu-1 | ||
container: | ||
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready | ||
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10 | ||
env: | ||
HF_ENDPOINT: https://hf-mirror.com | ||
HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
VLLM_LOGGING_LEVEL: ERROR | ||
steps: | ||
- name: Check npu and CANN info | ||
run: | | ||
npu-smi info | ||
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info | ||
|
||
- name: Config mirrors | ||
run: | | ||
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list | ||
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple | ||
apt-get update -y | ||
apt install git -y | ||
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/ | ||
|
||
- name: Checkout vllm-project/vllm-ascend repo | ||
uses: actions/checkout@v4 | ||
|
||
- name: Install system dependencies | ||
run: | | ||
apt-get -y install `cat packages.txt` | ||
apt-get -y install gcc g++ cmake libnuma-dev | ||
|
||
- name: Checkout vllm-project/vllm repo | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: vllm-project/vllm | ||
ref: ${{ matrix.vllm_version }} | ||
path: ./vllm-empty | ||
|
||
- name: Install vllm-project/vllm from source | ||
working-directory: ./vllm-empty | ||
run: | | ||
VLLM_TARGET_DEVICE=empty pip install -e . | ||
|
||
- name: Install vllm-project/vllm-ascend | ||
run: | | ||
pip install -r requirements-dev.txt | ||
pip install -v -e . | ||
|
||
- name: Run vllm-project/vllm-ascend long term test | ||
run: | | ||
# spec decode test | ||
VLLM_USE_MODELSCOPE=true pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py | ||
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process | ||
pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also cc @mengwei805 the spec decode will be triggered by manaully after this PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See the commit message:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i consider it is a wonderful refactor |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should be fixed by @zzzzwwjj in #939