Skip to content

Commit d1dec64

Browse files
[CI/Build][ROCm] Enabling LoRA tests on ROCm (#7369)
Co-authored-by: Simon Mo <simon.mo@hey.com>
1 parent 2ad2e56 commit d1dec64

File tree

4 files changed

+64
-14
lines changed

4 files changed

+64
-14
lines changed

.buildkite/run-amd-test.sh

100644100755
Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This script runs test inside the corresponding ROCm docker container.
2-
set -ex
2+
set -o pipefail
33

44
# Print ROCm version
55
echo "--- Confirming Clean Initial State"
@@ -70,16 +70,51 @@ HF_CACHE="$(realpath ~)/huggingface"
7070
mkdir -p ${HF_CACHE}
7171
HF_MOUNT="/root/.cache/huggingface"
7272

73-
docker run \
73+
commands=$@
74+
PARALLEL_JOB_COUNT=8
75+
# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs.
76+
if [[ $commands == *"--shard-id="* ]]; then
77+
for GPU in $(seq 0 $(($PARALLEL_JOB_COUNT-1))); do
78+
#replace shard arguments
79+
commands=${@//"--shard-id= "/"--shard-id=${GPU} "}
80+
commands=${commands//"--num-shards= "/"--num-shards=${PARALLEL_JOB_COUNT} "}
81+
docker run \
7482
--device /dev/kfd --device /dev/dri \
7583
--network host \
7684
--shm-size=16gb \
7785
--rm \
78-
-e HIP_VISIBLE_DEVICES=0 \
86+
-e HIP_VISIBLE_DEVICES=${GPU} \
7987
-e HF_TOKEN \
8088
-v ${HF_CACHE}:${HF_MOUNT} \
8189
-e HF_HOME=${HF_MOUNT} \
82-
--name ${container_name} \
90+
--name ${container_name}_${GPU} \
8391
${image_name} \
84-
/bin/bash -c "${@}"
85-
92+
/bin/bash -c "${commands}" \
93+
|& while read -r line; do echo ">>Shard $GPU: $line"; done &
94+
PIDS+=($!)
95+
done
96+
#wait for all processes to finish and collect exit codes
97+
for pid in ${PIDS[@]}; do
98+
wait ${pid}
99+
STATUS+=($?)
100+
done
101+
for st in ${STATUS[@]}; do
102+
if [[ ${st} -ne 0 ]]; then
103+
echo "One of the processes failed with $st"
104+
exit ${st}
105+
fi
106+
done
107+
else
108+
docker run \
109+
--device /dev/kfd --device /dev/dri \
110+
--network host \
111+
--shm-size=16gb \
112+
--rm \
113+
-e HIP_VISIBLE_DEVICES=0 \
114+
-e HF_TOKEN \
115+
-v ${HF_CACHE}:${HF_MOUNT} \
116+
-e HF_HOME=${HF_MOUNT} \
117+
--name ${container_name} \
118+
${image_name} \
119+
/bin/bash -c "${commands}"
120+
fi

.buildkite/test-pipeline.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,9 @@ steps:
218218
- pytest -v -s spec_decode
219219

220220
- label: LoRA Test %N # 30min each
221+
mirror_hardwares: [amd]
221222
source_file_dependencies:
222223
- vllm/lora
223-
- csrc/punica
224224
- tests/lora
225225
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py
226226
parallelism: 4
@@ -360,7 +360,6 @@ steps:
360360
num_gpus: 4
361361
source_file_dependencies:
362362
- vllm/lora
363-
- csrc/punica
364363
- tests/lora/test_long_context
365364
commands:
366365
# FIXIT: find out which code initialize cuda before running the test

tests/lora/test_gemma.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from typing import List
22

3+
import pytest
4+
35
import vllm
46
from vllm.lora.request import LoRARequest
7+
from vllm.utils import is_hip
58

69
MODEL_PATH = "google/gemma-7b"
710

@@ -28,6 +31,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
2831
return generated_texts
2932

3033

34+
@pytest.mark.xfail(is_hip(), reason="There can be output mismatch on ROCm")
3135
def test_gemma_lora(gemma_lora_files):
3236
llm = vllm.LLM(MODEL_PATH,
3337
max_model_len=1024,

tests/lora/test_quant_model.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import vllm
99
from vllm.lora.request import LoRARequest
10+
from vllm.utils import is_hip
1011

1112
from .conftest import cleanup
1213

@@ -17,12 +18,23 @@ class ModelWithQuantization:
1718
quantization: str
1819

1920

20-
MODELS: List[ModelWithQuantization] = [
21-
ModelWithQuantization(model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
22-
quantization="AWQ"),
23-
ModelWithQuantization(model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
24-
quantization="GPTQ"),
25-
]
21+
MODELS: List[ModelWithQuantization]
22+
#AWQ quantization is currently not supported in ROCm.
23+
if is_hip():
24+
MODELS = [
25+
ModelWithQuantization(
26+
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
27+
quantization="GPTQ"),
28+
]
29+
else:
30+
MODELS = [
31+
ModelWithQuantization(
32+
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ",
33+
quantization="AWQ"),
34+
ModelWithQuantization(
35+
model_path="TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ",
36+
quantization="GPTQ"),
37+
]
2638

2739

2840
def do_sample(llm: vllm.LLM,

0 commit comments

Comments
 (0)