9
9
# label(str): the name of the test. emoji allowed.
10
10
# fast_check(bool): whether to run this on each commit on fastcheck pipeline.
11
11
# fast_check_only(bool): run this test on fastcheck pipeline only
12
+ # optional(bool): never run this test by default (i.e. need to unblock manually)
12
13
# command(str): the single command to run for tests. incompatible with commands.
13
14
# commands(list): the list of commands to run for test. incompatbile with command.
14
15
# mirror_hardwares(list): the list of hardwares to run the test on as well. currently only supports [amd]
39
40
# Check API reference (if it fails, you may have missing mock imports)
40
41
- grep \"sig sig-object py\" build/html/dev/sampling_params.html
41
42
42
- - label : Async Engine, Inputs, Utils, Worker Test # 15min
43
+ - label : Async Engine, Inputs, Utils, Worker Test # 24min
43
44
fast_check : true
44
45
source_file_dependencies :
45
46
- vllm/
81
82
commands :
82
83
- pytest -v -s core
83
84
84
- - label : Entrypoints Test # 20min
85
+ - label : Entrypoints Test # 40min
85
86
working_dir : " /vllm-workspace/tests"
86
87
fast_check : true
87
88
mirror_hardwares : [amd]
95
96
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
96
97
- pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
97
98
- pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
98
- - pytest -v -s entrypoints/openai
99
+ - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py
100
+ - pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process
99
101
- pytest -v -s entrypoints/test_chat_utils.py
100
102
- pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
101
103
@@ -150,7 +152,7 @@ steps:
150
152
# OOM in the CI unless we run this separately
151
153
- pytest -v -s tokenization
152
154
153
- - label : Examples Test # 12min
155
+ - label : Examples Test # 15min
154
156
working_dir : " /vllm-workspace/examples"
155
157
# mirror_hardwares: [amd]
156
158
source_file_dependencies :
@@ -168,15 +170,15 @@ steps:
168
170
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
169
171
- python3 offline_inference_encoder_decoder.py
170
172
171
- - label : Prefix Caching Test # 7min
173
+ - label : Prefix Caching Test # 9min
172
174
# mirror_hardwares: [amd]
173
175
source_file_dependencies :
174
176
- vllm/
175
177
- tests/prefix_caching
176
178
commands :
177
179
- pytest -v -s prefix_caching
178
180
179
- - label : Samplers Test # 18min
181
+ - label : Samplers Test # 36min
180
182
source_file_dependencies :
181
183
- vllm/model_executor/layers
182
184
- vllm/sampling_metadata.py
@@ -192,7 +194,7 @@ steps:
192
194
- tests/test_logits_processor
193
195
command : pytest -v -s test_logits_processor.py
194
196
195
- - label : Speculative decoding tests # 22min
197
+ - label : Speculative decoding tests # 30min
196
198
source_file_dependencies :
197
199
- vllm/spec_decode
198
200
- tests/spec_decode
@@ -202,30 +204,30 @@ steps:
202
204
- pytest -v -s spec_decode/e2e/test_multistep_correctness.py
203
205
- pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
204
206
205
- - label : LoRA Test %N # 30min each
207
+ - label : LoRA Test %N # 15min each
206
208
mirror_hardwares : [amd]
207
209
source_file_dependencies :
208
210
- vllm/lora
209
211
- tests/lora
210
212
command : pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py
211
213
parallelism : 4
212
214
213
- - label : " PyTorch Fullgraph Smoke Test"
215
+ - label : " PyTorch Fullgraph Smoke Test" # 9min
214
216
fast_check : true
215
217
source_file_dependencies :
216
218
- vllm/
217
219
- tests/compile
218
220
commands :
219
221
- pytest -v -s compile/test_full_graph_smoke.py
220
222
221
- - label : " PyTorch Fullgraph Test"
223
+ - label : " PyTorch Fullgraph Test" # 18min
222
224
source_file_dependencies :
223
225
- vllm/
224
226
- tests/compile
225
227
commands :
226
228
- pytest -v -s compile/test_full_graph.py
227
229
228
- - label : Kernels Test %N # 30min each
230
+ - label : Kernels Test %N # 1h each
229
231
mirror_hardwares : [amd]
230
232
source_file_dependencies :
231
233
- csrc/
@@ -255,7 +257,7 @@ steps:
255
257
- pip install aiohttp
256
258
- bash run-benchmarks.sh
257
259
258
- - label : Quantization Test # 15min
260
+ - label : Quantization Test # 33min
259
261
source_file_dependencies :
260
262
- csrc/
261
263
- vllm/model_executor/layers/quantization
@@ -299,15 +301,15 @@ steps:
299
301
- pytest -v -s models/test_oot_registration.py # it needs a clean process
300
302
- pytest -v -s models/*.py --ignore=models/test_oot_registration.py
301
303
302
- - label : Decoder-only Language Models Test # 1h3min
304
+ - label : Decoder-only Language Models Test # 1h36min
303
305
# mirror_hardwares: [amd]
304
306
source_file_dependencies :
305
307
- vllm/
306
308
- tests/models/decoder_only/language
307
309
commands :
308
310
- pytest -v -s models/decoder_only/language
309
311
310
- - label : Decoder-only Multi-Modal Models Test # 56min
312
+ - label : Decoder-only Multi-Modal Models Test # 1h31min
311
313
# mirror_hardwares: [amd]
312
314
source_file_dependencies :
313
315
- vllm/
@@ -317,15 +319,25 @@ steps:
317
319
- pytest -v -s models/decoder_only/audio_language
318
320
- pytest -v -s models/decoder_only/vision_language
319
321
320
- - label : Other Models Test # 5min
322
+ - label : Other Models Test # 6min
321
323
# mirror_hardwares: [amd]
322
324
source_file_dependencies :
323
325
- vllm/
324
326
- tests/models/embedding/language
325
327
- tests/models/encoder_decoder/language
328
+ - tests/models/encoder_decoder/vision_language
326
329
commands :
327
330
- pytest -v -s models/embedding/language
328
331
- pytest -v -s models/encoder_decoder/language
332
+ - pytest -v -s models/encoder_decoder/vision_language
333
+
334
+ - label : Custom Models Test
335
+ # mirror_hardwares: [amd]
336
+ optional : true
337
+ commands :
338
+ # PR authors can temporarily add commands below to test individual models
339
+ # e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py
340
+ # *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR*
329
341
330
342
# #### 1 GPU test #####
331
343
# #### multi gpus test #####
@@ -358,7 +370,7 @@ steps:
358
370
- # the following commands are for the second node, with ip 192.168.10.11 (ray environment already set up)
359
371
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep -q 'Same node test passed'
360
372
361
- - label : Distributed Tests (2 GPUs) # 28min
373
+ - label : Distributed Tests (2 GPUs) # 40min
362
374
# mirror_hardwares: [amd]
363
375
working_dir : " /vllm-workspace/tests"
364
376
num_gpus : 2
@@ -375,14 +387,16 @@ steps:
375
387
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep -q 'Same node test passed'
376
388
- TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m distributed_2_gpus
377
389
# Avoid importing model tests that cause CUDA reinitialization error
378
- - pytest models/encoder_decoder/language/test_bart.py models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
390
+ - pytest models/encoder_decoder/language/test_bart.py -v -s -m distributed_2_gpus
391
+ - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
392
+ - pytest models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
379
393
- pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
380
394
- pip install -e ./plugins/vllm_add_dummy_model
381
395
- pytest -v -s distributed/test_distributed_oot.py
382
396
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
383
397
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
384
398
385
- - label : Multi-step Tests (4 GPUs) # 21min
399
+ - label : Multi-step Tests (4 GPUs) # 36min
386
400
working_dir : " /vllm-workspace/tests"
387
401
num_gpus : 4
388
402
source_file_dependencies :
@@ -400,7 +414,7 @@ steps:
400
414
- pytest -v -s multi_step/test_correctness_async_llm.py
401
415
- pytest -v -s multi_step/test_correctness_llm.py
402
416
403
- - label : Pipeline Parallelism Test # 23min
417
+ - label : Pipeline Parallelism Test # 45min
404
418
working_dir : " /vllm-workspace/tests"
405
419
num_gpus : 4
406
420
source_file_dependencies :
@@ -426,7 +440,7 @@ steps:
426
440
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
427
441
- pytest -v -s -x lora/test_long_context.py
428
442
429
- - label : Weight Loading Multiple GPU Test
443
+ - label : Weight Loading Multiple GPU Test # 33min
430
444
working_dir : " /vllm-workspace/tests"
431
445
num_gpus : 2
432
446
source_file_dependencies :
@@ -459,7 +473,7 @@ steps:
459
473
# NOTE: don't test llama model here, it seems hf implementation is buggy
460
474
# see https://github.com/vllm-project/vllm/pull/5689 for details
461
475
- pytest -v -s distributed/test_custom_all_reduce.py
462
- - TARGET_TEST_SUITE=A100 pytest -v -s distributed/test_basic_distributed_correctness.py
476
+ - TARGET_TEST_SUITE=A100 pytest basic_correctness/ -v -s -m distributed_2_gpus
463
477
- pytest -v -s -x lora/test_mixtral.py
464
478
465
479
- label : LM Eval Large Models # optional
0 commit comments