Skip to content

Commit 9a81218

Browse files
authored
[Serve.llm] feat: add missing repetition_penalty vLLM sampling param (#53222)
Signed-off-by: Arthur <atte.book@gmail.com>
1 parent a9dab3a commit 9a81218

File tree

4 files changed

+13
-0
lines changed

4 files changed

+13
-0
lines changed

python/ray/llm/_internal/serve/deployments/llm/vllm/vllm_engine.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,9 @@ def _parse_sampling_params(
919919
frequency_penalty=sampling_params.frequency_penalty
920920
if sampling_params.frequency_penalty is not None
921921
else 0.0,
922+
repetition_penalty=sampling_params.repetition_penalty
923+
if sampling_params.repetition_penalty is not None
924+
else 1.0,
922925
temperature=sampling_params.temperature
923926
if sampling_params.temperature is not None
924927
else 1.0,

python/ray/llm/_internal/serve/deployments/llm/vllm/vllm_models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,16 @@ class VLLMSamplingParams(SamplingParams):
211211
Args:
212212
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering.
213213
seed: Seed for deterministic sampling with temperature>0.
214+
repetition_penalty: Float that penalizes new tokens based on whether they
215+
appear in the prompt and the generated text so far. Values > 1 encourage
216+
the model to use new tokens, while values < 1 encourage the model to repeat
217+
tokens.
214218
"""
215219

216220
_ignored_fields = {"best_of", "n", "logit_bias"}
217221

218222
top_k: Optional[int] = None
223+
repetition_penalty: Optional[float] = None
219224
seed: Optional[int] = None
220225

221226

python/ray/llm/tests/serve/cpu/deployments/llm/multiplex/test_multiplex_deployment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
"ignore_eos": None,
7575
"presence_penalty": None,
7676
"frequency_penalty": None,
77+
"repetition_penalty": None,
7778
"best_of": 1,
7879
"response_format": None,
7980
"top_k": None,
@@ -177,6 +178,7 @@ async def test_multiplex_deployment(
177178
"ignore_eos": None,
178179
"presence_penalty": None,
179180
"frequency_penalty": None,
181+
"repetition_penalty": None,
180182
"top_k": None,
181183
"response_format": None,
182184
"logprobs": None,

python/ray/llm/tests/serve/mocks/mock_vllm_engine.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ def _parse_sampling_params(
162162
frequency_penalty=sampling_params.frequency_penalty
163163
if sampling_params.frequency_penalty is not None
164164
else 0.0,
165+
repetition_penalty=sampling_params.repetition_penalty
166+
if sampling_params.repetition_penalty is not None
167+
else 1.0,
165168
temperature=sampling_params.temperature
166169
if sampling_params.temperature is not None
167170
else 1.0,

0 commit comments

Comments
 (0)