[perf]: support dual-batch overlap(dbo) for deepseek #2828

Workflow file for this run

.github/workflows/vllm_ascend_test.yaml at cfe6a5a

	#
	# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# This file is a part of the vllm-ascend project.
	#

	name: 'e2e test'

	on:
	schedule:
	- cron: '0 23 * * *'
	pull_request:
	branches:
	- 'main'
	- '*-dev'
	paths:
	- '*.txt'
	- '*/.py'
	- '.github/workflows/vllm_ascend_test.yaml'
	- '!docs/**'
	- 'pytest.ini'

	# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
	# declared as "shell: bash -el {0}" on steps that need to be properly activated.
	# It's used to activate ascend-toolkit environment variables.
	defaults:
	run:
	shell: bash -el {0}

	concurrency:
	group: pr-${{ github.event.pull_request.number }}
	cancel-in-progress: true

	jobs:
	test:
	strategy:
	max-parallel: 2
	matrix:
	os: [linux-arm64-npu-1, linux-arm64-npu-4]
	vllm_verison: [main, v0.8.5.post1]
	concurrency:
	group: >
	${{
	matrix.os == 'linux-arm64-npu-4'
	&& github.event.pull_request.number
	&& format('pr-{0}-limit-npu-4', github.event.pull_request.number)
	\|\| format('job-{0}-{1}-{2}', matrix.os, matrix.vllm_verison, github.event.pull_request.number)
	}}
	cancel-in-progress: false
	name: vLLM Ascend test
	runs-on: ${{ matrix.os }}
	container:
	# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
	image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
	env:
	HF_ENDPOINT: https://hf-mirror.com
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	steps:
	- name: Check npu and CANN info
	run: \|
	npu-smi info
	cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info

	- name: Config mirrors
	run: \|
	sed -i 's\|ports.ubuntu.com\|mirrors.tuna.tsinghua.edu.cn\|g' /etc/apt/sources.list
	pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
	apt-get update -y
	apt install git -y
	git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/

	- name: Checkout vllm-project/vllm-ascend repo
	uses: actions/checkout@v4

	- name: Install system dependencies
	run: \|
	apt-get -y install `cat packages.txt`
	apt-get -y install gcc g++ cmake libnuma-dev

	- name: Checkout vllm-project/vllm repo
	uses: actions/checkout@v4
	with:
	repository: vllm-project/vllm
	ref: ${{ matrix.vllm_verison }}
	path: ./vllm-empty

	- name: Install vllm-project/vllm from source
	working-directory: ./vllm-empty
	run: \|
	VLLM_TARGET_DEVICE=empty pip install -e .

	- name: Install vllm-project/vllm-ascend
	run: \|
	pip install -r requirements-dev.txt
	pip install -v -e .

	- name: Run vllm-project/vllm-ascend test for V1 Engine
	env:
	VLLM_USE_V1: 1
	VLLM_WORKER_MULTIPROC_METHOD: spawn
	run: \|
	if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
	pytest -sv tests/singlecard/test_offline_inference.py
	pytest -sv tests/ops
	pytest -sv tests/compile
	else
	pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py
	pytest -sv tests/ops
	pytest -sv tests/compile
	fi

	- name: Run vllm-project/vllm-ascend test on V0 engine
	env:
	VLLM_USE_V1: 0
	run: \|
	if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
	pytest -sv tests/singlecard/test_offline_inference.py
	pytest -sv tests/ops
	else
	pytest -sv -k "QwQ" tests/multicard/test_offline_inference_distributed.py
	pytest -sv -k "DeepSeek" tests/multicard/test_offline_inference_distributed.py
	pytest -sv tests/ops
	fi

	# only run test on spec decode when the related code changed
	- name: Check for changes in Speculative Decode
	if: github.event_name != 'schedule'
	id: filter_spec_decode
	uses: dorny/paths-filter@v3
	with:
	filters: \|
	speculative_tests_changed:
	- ".github/workflows/vllm_ascend_test.yaml"
	- "tests/singlecard/spec_decode/**"
	- "tests/multicard/spec_decode_e2e/**"
	- "vllm_ascend/worker/worker.py"
	- "vllm_ascend/worker/model_runner.py"
	- "vllm_ascend/worker/multi_step_runner.py"
	- "vllm_ascend/worker/multi_step_worker.py"
	- "vllm_ascend/worker/draft_model_runner.py"
	- "vllm_ascend/patch/worker/patch_common/patch_metrics.py"
	- "vllm_ascend/patch/worker/patch_common/patch_spec_decode_worker.py"
	- "vllm_ascend/patch/worker/patch_common/patch_multi_step_worker.py"

	- name: Run vllm-project/vllm-ascend Speculative Decode test
	if: steps.filter_spec_decode.outputs.speculative_tests_changed == 'true' \|\| github.event_name == 'schedule'
	run: \|
	if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
	pytest -sv tests/singlecard/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
	pytest -sv tests/singlecard/spec_decode --ignore=tests/singlecard/spec_decode/e2e/test_mtp_correctness.py
	fi

	- name: Run vllm-project/vllm test for V0 Engine
	env:
	VLLM_USE_V1: 0
	PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
	run: \|
	pytest -sv

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[perf]: support dual-batch overlap(dbo) for deepseek #2828

Workflow file

[perf]: support dual-batch overlap(dbo) for deepseek #2828

Uh oh!

Jobs

Run details

Workflow file for this run