ray-project · kouroshHakha · May 23, 2025 · May 21, 2025 · May 21, 2025
@@ -919,6 +919,9 @@ def _parse_sampling_params(
                 frequency_penalty=sampling_params.frequency_penalty
                 if sampling_params.frequency_penalty is not None
                 else 0.0,
+                repetition_penalty=sampling_params.repetition_penalty
+                if sampling_params.repetition_penalty is not None
+                else 1.0,
                 temperature=sampling_params.temperature
                 if sampling_params.temperature is not None
                 else 1.0,

@@ -211,11 +211,16 @@ class VLLMSamplingParams(SamplingParams):
     Args:
         top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering.
         seed: Seed for deterministic sampling with temperature>0.
+        repetition_penalty: Float that penalizes new tokens based on whether they
+            appear in the prompt and the generated text so far. Values > 1 encourage
+            the model to use new tokens, while values < 1 encourage the model to repeat
+            tokens.
     """
 
     _ignored_fields = {"best_of", "n", "logit_bias"}
 
     top_k: Optional[int] = None
+    repetition_penalty: Optional[float] = None
     seed: Optional[int] = None
 
 

@@ -74,6 +74,7 @@
         "ignore_eos": None,
         "presence_penalty": None,
         "frequency_penalty": None,
+        "repetition_penalty": None,
         "best_of": 1,
         "response_format": None,
         "top_k": None,
@@ -177,6 +178,7 @@ async def test_multiplex_deployment(
             "ignore_eos": None,
             "presence_penalty": None,
             "frequency_penalty": None,
+            "repetition_penalty": None,
             "top_k": None,
             "response_format": None,
             "logprobs": None,

@@ -162,6 +162,9 @@ def _parse_sampling_params(
                 frequency_penalty=sampling_params.frequency_penalty
                 if sampling_params.frequency_penalty is not None
                 else 0.0,
+                repetition_penalty=sampling_params.repetition_penalty
+                if sampling_params.repetition_penalty is not None
+                else 1.0,
                 temperature=sampling_params.temperature
                 if sampling_params.temperature is not None
                 else 1.0,