Skip to content

Commit 9d47288

Browse files
authored
Merge branch 'vllm-project:main' into ovis
2 parents fb035dd + a810b5b commit 9d47288

File tree

160 files changed

+7733
-2215
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

160 files changed

+7733
-2215
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Llama-3.2-1B-Instruct-FP8 -b "auto" -l 1319 -f 5 -t 1
2+
model_name: "RedHatAI/Llama-3.2-1B-Instruct-FP8"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.335
8+
- name: "exact_match,flexible-extract"
9+
value: 0.323
10+
limit: 1319
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m Qwen/Qwen2.5-1.5B-Instruct -b auto -l 1319 -f 5 -t 1
2+
model_name: "Qwen/Qwen2.5-1.5B-Instruct"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.54
8+
- name: "exact_match,flexible-extract"
9+
value: 0.59
10+
limit: 1319
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -b auto -l 1319 -f 5 -t 1
2+
model_name: "RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.47
8+
- name: "exact_match,flexible-extract"
9+
value: 0.64
10+
limit: 1319
11+
num_fewshot: 5

.buildkite/lm-eval-harness/configs/models-large.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ Meta-Llama-3-70B-Instruct.yaml
33
Mixtral-8x7B-Instruct-v0.1.yaml
44
Qwen2-57B-A14-Instruct.yaml
55
DeepSeek-V2-Lite-Chat.yaml
6+
Meta-Llama-3-8B-QQQ.yaml
Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
Meta-Llama-3-8B-Instruct.yaml
2-
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
1+
Qwen2.5-1.5B-Instruct.yaml
32
Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
43
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
54
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
6-
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
5+
Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
76
Qwen1.5-MoE-W4A16-compressed-tensors.yaml
8-
Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml
9-
Qwen2-1.5B-Instruct-FP8W8.yaml
10-
Meta-Llama-3-8B-QQQ.yaml

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
# This script runs test inside the corresponding ROCm docker container.
44
set -o pipefail
55

6+
# Export Python path
7+
export PYTHONPATH=".."
8+
69
# Print ROCm version
710
echo "--- Confirming Clean Initial State"
811
while true; do
@@ -74,6 +77,15 @@ HF_MOUNT="/root/.cache/huggingface"
7477

7578
commands=$@
7679
echo "Commands:$commands"
80+
81+
if [[ $commands == *"pytest -v -s basic_correctness/test_basic_correctness.py"* ]]; then
82+
commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s basic_correctness/test_basic_correctness.py"}
83+
fi
84+
85+
if [[ $commands == *"pytest -v -s compile/test_basic_correctness.py"* ]]; then
86+
commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s compile/test_basic_correctness.py"}
87+
fi
88+
7789
#ignore certain kernels tests
7890
if [[ $commands == *" kernels/core"* ]]; then
7991
commands="${commands} \
@@ -161,6 +173,8 @@ fi
161173

162174

163175
PARALLEL_JOB_COUNT=8
176+
MYPYTHONPATH=".."
177+
164178
# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs.
165179
if [[ $commands == *"--shard-id="* ]]; then
166180
# assign job count as the number of shards used
@@ -181,6 +195,7 @@ if [[ $commands == *"--shard-id="* ]]; then
181195
-e AWS_SECRET_ACCESS_KEY \
182196
-v "${HF_CACHE}:${HF_MOUNT}" \
183197
-e "HF_HOME=${HF_MOUNT}" \
198+
-e "PYTHONPATH=${MYPYTHONPATH}" \
184199
--name "${container_name}_${GPU}" \
185200
"${image_name}" \
186201
/bin/bash -c "${commands_gpu}" \
@@ -211,6 +226,7 @@ else
211226
-e AWS_SECRET_ACCESS_KEY \
212227
-v "${HF_CACHE}:${HF_MOUNT}" \
213228
-e "HF_HOME=${HF_MOUNT}" \
229+
-e "PYTHONPATH=${MYPYTHONPATH}" \
214230
--name "${container_name}" \
215231
"${image_name}" \
216232
/bin/bash -c "${commands}"
Lines changed: 74 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/bin/bash
22

3-
set -xue
3+
set -xu
44

55
# Build the docker image.
66
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
@@ -24,36 +24,80 @@ docker run --privileged --net host --shm-size=16G -it \
2424
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
2525
&& echo HARDWARE \
2626
&& tpu-info \
27-
&& echo TEST_0 \
28-
&& pytest -v -s /workspace/vllm/tests/v1/tpu/test_perf.py \
29-
&& echo TEST_1 \
30-
&& pytest -v -s /workspace/vllm/tests/tpu/test_compilation.py \
31-
&& echo TEST_2 \
32-
&& pytest -v -s /workspace/vllm/tests/v1/tpu/test_basic.py \
33-
&& echo TEST_3 \
34-
&& pytest -v -s /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine \
35-
&& echo TEST_4 \
36-
&& pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
37-
&& echo TEST_5 \
38-
&& python3 /workspace/vllm/examples/offline_inference/tpu.py \
39-
&& echo TEST_6 \
40-
&& pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py \
41-
&& echo TEST_7 \
42-
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py \
43-
&& echo TEST_8 \
44-
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py \
45-
&& echo TEST_9 \
46-
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py \
47-
&& echo TEST_10 \
48-
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py \
49-
&& echo TEST_11 \
50-
&& pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py \
51-
&& echo TEST_12 \
52-
&& pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py" \
27+
&& { \
28+
echo TEST_0: Running test_perf.py; \
29+
pytest -s -v /workspace/vllm/tests/tpu/test_perf.py; \
30+
echo TEST_0_EXIT_CODE: \$?; \
31+
} & \
32+
&& { \
33+
echo TEST_1: Running test_compilation.py; \
34+
pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py; \
35+
echo TEST_1_EXIT_CODE: \$?; \
36+
} & \
37+
{ \
38+
echo TEST_2: Running test_basic.py; \
39+
pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py; \
40+
echo TEST_2_EXIT_CODE: \$?; \
41+
} & \
42+
{ \
43+
echo TEST_3: Running test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
44+
pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
45+
echo TEST_3_EXIT_CODE: \$?; \
46+
} & \
47+
{ \
48+
echo TEST_4: Running test_quantization_accuracy.py; \
49+
pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py; \
50+
echo TEST_4_EXIT_CODE: \$?; \
51+
} & \
52+
{ \
53+
echo TEST_5: Running examples/offline_inference/tpu.py; \
54+
python3 /workspace/vllm/examples/offline_inference/tpu.py; \
55+
echo TEST_5_EXIT_CODE: \$?; \
56+
} & \
57+
{ \
58+
echo TEST_6: Running test_tpu_model_runner.py; \
59+
pytest -s -v /workspace/vllm/tests/tpu/worker/test_tpu_model_runner.py; \
60+
echo TEST_6_EXIT_CODE: \$?; \
61+
} & \
62+
&& { \
63+
echo TEST_7: Running test_sampler.py; \
64+
pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py; \
65+
echo TEST_7_EXIT_CODE: \$?; \
66+
} & \
67+
&& { \
68+
echo TEST_8: Running test_topk_topp_sampler.py; \
69+
pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py; \
70+
echo TEST_8_EXIT_CODE: \$?; \
71+
} & \
72+
&& { \
73+
echo TEST_9: Running test_multimodal.py; \
74+
pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py; \
75+
echo TEST_9_EXIT_CODE: \$?; \
76+
} & \
77+
&& { \
78+
echo TEST_10: Running test_pallas.py; \
79+
pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py; \
80+
echo TEST_10_EXIT_CODE: \$?; \
81+
} & \
82+
&& { \
83+
echo TEST_11: Running test_struct_output_generate.py; \
84+
pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py; \
85+
echo TEST_11_EXIT_CODE: \$?; \
86+
} & \
87+
&& { \
88+
echo TEST_12: Running test_moe_pallas.py; \
89+
pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py; \
90+
echo TEST_12_EXIT_CODE: \$?; \
91+
} & \
5392
# Disable the TPU LoRA tests until the feature is activated
54-
# && echo TEST_13 \
55-
# && pytest -s -v /workspace/vllm/tests/tpu/lora/" \
56-
93+
# && { \
94+
# echo TEST_13: Running test_moe_pallas.py; \
95+
# pytest -s -v /workspace/vllm/tests/tpu/lora/; \
96+
# echo TEST_13_EXIT_CODE: \$?; \
97+
# } & \
98+
wait \
99+
&& echo 'All tests have attempted to run. Check logs for individual test statuses and exit codes.' \
100+
"
57101

58102
# TODO: This test fails because it uses RANDOM_SEED sampling
59103
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \

0 commit comments

Comments
 (0)