9
9
# label(str): the name of the test. emoji allowed.
10
10
# fast_check(bool): whether to run this on each commit on fastcheck pipeline.
11
11
# fast_check_only(bool): run this test on fastcheck pipeline only
12
+ # optional(bool): never run this test by default (i.e. need to unblock manually)
12
13
# command(str): the single command to run for tests. incompatible with commands.
13
14
# commands(list): the list of commands to run for test. incompatbile with command.
14
15
# mirror_hardwares(list): the list of hardwares to run the test on as well. currently only supports [amd]
39
40
# Check API reference (if it fails, you may have missing mock imports)
40
41
- grep \"sig sig-object py\" build/html/dev/sampling_params.html
41
42
42
- - label : Async Engine, Inputs, Utils, Worker Test # 15min
43
+ - label : Async Engine, Inputs, Utils, Worker Test # 24min
43
44
fast_check : true
44
45
source_file_dependencies :
45
46
- vllm/
81
82
commands :
82
83
- pytest -v -s core
83
84
84
- - label : Entrypoints Test # 20min
85
+ - label : Entrypoints Test # 40min
85
86
working_dir : " /vllm-workspace/tests"
86
87
fast_check : true
87
88
mirror_hardwares : [amd]
@@ -151,7 +152,7 @@ steps:
151
152
# OOM in the CI unless we run this separately
152
153
- pytest -v -s tokenization
153
154
154
- - label : Examples Test # 12min
155
+ - label : Examples Test # 15min
155
156
working_dir : " /vllm-workspace/examples"
156
157
# mirror_hardwares: [amd]
157
158
source_file_dependencies :
@@ -169,15 +170,15 @@ steps:
169
170
- python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
170
171
- python3 offline_inference_encoder_decoder.py
171
172
172
- - label : Prefix Caching Test # 7min
173
+ - label : Prefix Caching Test # 9min
173
174
# mirror_hardwares: [amd]
174
175
source_file_dependencies :
175
176
- vllm/
176
177
- tests/prefix_caching
177
178
commands :
178
179
- pytest -v -s prefix_caching
179
180
180
- - label : Samplers Test # 18min
181
+ - label : Samplers Test # 36min
181
182
source_file_dependencies :
182
183
- vllm/model_executor/layers
183
184
- vllm/sampling_metadata.py
@@ -193,7 +194,7 @@ steps:
193
194
- tests/test_logits_processor
194
195
command : pytest -v -s test_logits_processor.py
195
196
196
- - label : Speculative decoding tests # 22min
197
+ - label : Speculative decoding tests # 30min
197
198
source_file_dependencies :
198
199
- vllm/spec_decode
199
200
- tests/spec_decode
@@ -203,30 +204,30 @@ steps:
203
204
- pytest -v -s spec_decode/e2e/test_multistep_correctness.py
204
205
- pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
205
206
206
- - label : LoRA Test %N # 30min each
207
+ - label : LoRA Test %N # 15min each
207
208
mirror_hardwares : [amd]
208
209
source_file_dependencies :
209
210
- vllm/lora
210
211
- tests/lora
211
212
command : pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py
212
213
parallelism : 4
213
214
214
- - label : " PyTorch Fullgraph Smoke Test"
215
+ - label : " PyTorch Fullgraph Smoke Test" # 9min
215
216
fast_check : true
216
217
source_file_dependencies :
217
218
- vllm/
218
219
- tests/compile
219
220
commands :
220
221
- pytest -v -s compile/test_full_graph_smoke.py
221
222
222
- - label : " PyTorch Fullgraph Test"
223
+ - label : " PyTorch Fullgraph Test" # 18min
223
224
source_file_dependencies :
224
225
- vllm/
225
226
- tests/compile
226
227
commands :
227
228
- pytest -v -s compile/test_full_graph.py
228
229
229
- - label : Kernels Test %N # 30min each
230
+ - label : Kernels Test %N # 1h each
230
231
mirror_hardwares : [amd]
231
232
source_file_dependencies :
232
233
- csrc/
@@ -256,7 +257,7 @@ steps:
256
257
- pip install aiohttp
257
258
- bash run-benchmarks.sh
258
259
259
- - label : Quantization Test # 15min
260
+ - label : Quantization Test # 33min
260
261
source_file_dependencies :
261
262
- csrc/
262
263
- vllm/model_executor/layers/quantization
@@ -300,15 +301,15 @@ steps:
300
301
- pytest -v -s models/test_oot_registration.py # it needs a clean process
301
302
- pytest -v -s models/*.py --ignore=models/test_oot_registration.py
302
303
303
- - label : Decoder-only Language Models Test # 1h3min
304
+ - label : Decoder-only Language Models Test # 1h36min
304
305
# mirror_hardwares: [amd]
305
306
source_file_dependencies :
306
307
- vllm/
307
308
- tests/models/decoder_only/language
308
309
commands :
309
310
- pytest -v -s models/decoder_only/language
310
311
311
- - label : Decoder-only Multi-Modal Models Test # 56min
312
+ - label : Decoder-only Multi-Modal Models Test # 1h31min
312
313
# mirror_hardwares: [amd]
313
314
source_file_dependencies :
314
315
- vllm/
@@ -318,15 +319,25 @@ steps:
318
319
- pytest -v -s models/decoder_only/audio_language
319
320
- pytest -v -s models/decoder_only/vision_language
320
321
321
- - label : Other Models Test # 5min
322
+ - label : Other Models Test # 6min
322
323
# mirror_hardwares: [amd]
323
324
source_file_dependencies :
324
325
- vllm/
325
326
- tests/models/embedding/language
326
327
- tests/models/encoder_decoder/language
328
+ - tests/models/encoder_decoder/vision_language
327
329
commands :
328
330
- pytest -v -s models/embedding/language
329
331
- pytest -v -s models/encoder_decoder/language
332
+ - pytest -v -s models/encoder_decoder/vision_language
333
+
334
+ - label : Custom Models Test
335
+ # mirror_hardwares: [amd]
336
+ optional : true
337
+ commands :
338
+ # PR authors can temporarily add commands below to test individual models
339
+ # e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py
340
+ # *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR*
330
341
331
342
# #### 1 GPU test #####
332
343
# #### multi gpus test #####
@@ -359,7 +370,7 @@ steps:
359
370
- # the following commands are for the second node, with ip 192.168.10.11 (ray environment already set up)
360
371
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep -q 'Same node test passed'
361
372
362
- - label : Distributed Tests (2 GPUs) # 28min
373
+ - label : Distributed Tests (2 GPUs) # 40min
363
374
# mirror_hardwares: [amd]
364
375
working_dir : " /vllm-workspace/tests"
365
376
num_gpus : 2
@@ -376,14 +387,16 @@ steps:
376
387
- VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep -q 'Same node test passed'
377
388
- TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m distributed_2_gpus
378
389
# Avoid importing model tests that cause CUDA reinitialization error
379
- - pytest models/encoder_decoder/language/test_bart.py models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
390
+ - pytest models/encoder_decoder/language/test_bart.py -v -s -m distributed_2_gpus
391
+ - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
392
+ - pytest models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
380
393
- pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
381
394
- pip install -e ./plugins/vllm_add_dummy_model
382
395
- pytest -v -s distributed/test_distributed_oot.py
383
396
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
384
397
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
385
398
386
- - label : Multi-step Tests (4 GPUs) # 21min
399
+ - label : Multi-step Tests (4 GPUs) # 36min
387
400
working_dir : " /vllm-workspace/tests"
388
401
num_gpus : 4
389
402
source_file_dependencies :
@@ -401,7 +414,7 @@ steps:
401
414
- pytest -v -s multi_step/test_correctness_async_llm.py
402
415
- pytest -v -s multi_step/test_correctness_llm.py
403
416
404
- - label : Pipeline Parallelism Test # 23min
417
+ - label : Pipeline Parallelism Test # 45min
405
418
working_dir : " /vllm-workspace/tests"
406
419
num_gpus : 4
407
420
source_file_dependencies :
@@ -427,7 +440,7 @@ steps:
427
440
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
428
441
- pytest -v -s -x lora/test_long_context.py
429
442
430
- - label : Weight Loading Multiple GPU Test
443
+ - label : Weight Loading Multiple GPU Test # 33min
431
444
working_dir : " /vllm-workspace/tests"
432
445
num_gpus : 2
433
446
source_file_dependencies :
0 commit comments