Skip to content

Commit ffe725f

Browse files
ArthurBooklk-chen
authored andcommitted
[Serve.llm] feat: add missing repetition_penalty vLLM sampling param (ray-project#53222)
Signed-off-by: Arthur <atte.book@gmail.com>
1 parent bee30f9 commit ffe725f

File tree

3 files changed

+10
-0
lines changed

3 files changed

+10
-0
lines changed

python/ray/llm/_internal/serve/deployments/llm/vllm/vllm_models.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,16 @@ class VLLMSamplingParams(SamplingParams):
215215
Args:
216216
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering.
217217
seed: Seed for deterministic sampling with temperature>0.
218+
repetition_penalty: Float that penalizes new tokens based on whether they
219+
appear in the prompt and the generated text so far. Values > 1 encourage
220+
the model to use new tokens, while values < 1 encourage the model to repeat
221+
tokens.
218222
"""
219223

220224
_ignored_fields = {"best_of", "n", "logit_bias"}
221225

222226
top_k: Optional[int] = None
227+
repetition_penalty: Optional[float] = None
223228
seed: Optional[int] = None
224229
kv_transfer_params: Optional[Dict[str, Any]] = None
225230

python/ray/llm/tests/serve/cpu/deployments/llm/multiplex/test_multiplex_deployment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
"ignore_eos": None,
7575
"presence_penalty": None,
7676
"frequency_penalty": None,
77+
"repetition_penalty": None,
7778
"best_of": 1,
7879
"response_format": None,
7980
"top_k": None,
@@ -179,6 +180,7 @@ async def test_multiplex_deployment(
179180
"ignore_eos": None,
180181
"presence_penalty": None,
181182
"frequency_penalty": None,
183+
"repetition_penalty": None,
182184
"top_k": None,
183185
"response_format": None,
184186
"logprobs": None,

python/ray/llm/tests/serve/mocks/mock_vllm_engine.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,9 @@ def _parse_sampling_params(
166166
frequency_penalty=sampling_params.frequency_penalty
167167
if sampling_params.frequency_penalty is not None
168168
else 0.0,
169+
repetition_penalty=sampling_params.repetition_penalty
170+
if sampling_params.repetition_penalty is not None
171+
else 1.0,
169172
temperature=sampling_params.temperature
170173
if sampling_params.temperature is not None
171174
else 1.0,

0 commit comments

Comments
 (0)