@@ -261,7 +261,8 @@ class ModelConfig:
261
261
- "float" is shorthand for FP32 precision.\n
262
262
- "float32" for FP32 precision."""
263
263
seed : Optional [int ] = None
264
- """Random seed for reproducibility."""
264
+ """Random seed for reproducibility. Initialized to None in V0, but
265
+ initialized to 0 in V1."""
265
266
hf_config_path : Optional [str ] = None
266
267
"""Name or path of the Hugging Face config to use. If unspecified, model
267
268
name or path will be used."""
@@ -441,6 +442,24 @@ def compute_hash(self) -> str:
441
442
return hashlib .sha256 (str (factors ).encode ()).hexdigest ()
442
443
443
444
def __post_init__ (self ) -> None :
445
+ # Set the default seed to 0 in V1.
446
+ # NOTE(woosuk): In V0, we set the default seed to None because the
447
+ # driver worker shares the same process as the user process, and thus
448
+ # setting a seed affects the user process as well.
449
+ # In V1, we use separate processes for workers (unless
450
+ # VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here
451
+ # doesn't affect the user process. However, without a consistent seed,
452
+ # different tensor parallel workers would sample different tokens,
453
+ # leading to inconsistent results.
454
+ if envs .VLLM_USE_V1 and self .seed is None :
455
+ self .seed = 0
456
+ if not envs .VLLM_ENABLE_V1_MULTIPROCESSING :
457
+ logger .warning (
458
+ "The global random seed is set to %d. Since "
459
+ "VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may "
460
+ "affect the random state of the Python process that "
461
+ "launched vLLM." , self .seed )
462
+
444
463
self .model = maybe_model_redirect (self .model )
445
464
# The tokenizer is consistent with the model by default.
446
465
if self .tokenizer is None :
0 commit comments