Skip to content

[Serve.llm] feat: add missing repetition_penalty vLLM sampling param #53222

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,9 @@ def _parse_sampling_params(
frequency_penalty=sampling_params.frequency_penalty
if sampling_params.frequency_penalty is not None
else 0.0,
repetition_penalty=sampling_params.repetition_penalty
if sampling_params.repetition_penalty is not None
else 1.0,
temperature=sampling_params.temperature
if sampling_params.temperature is not None
else 1.0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,16 @@ class VLLMSamplingParams(SamplingParams):
Args:
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering.
seed: Seed for deterministic sampling with temperature>0.
repetition_penalty: Float that penalizes new tokens based on whether they
appear in the prompt and the generated text so far. Values > 1 encourage
the model to use new tokens, while values < 1 encourage the model to repeat
tokens.
"""

_ignored_fields = {"best_of", "n", "logit_bias"}

top_k: Optional[int] = None
repetition_penalty: Optional[float] = None
seed: Optional[int] = None


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
"ignore_eos": None,
"presence_penalty": None,
"frequency_penalty": None,
"repetition_penalty": None,
"best_of": 1,
"response_format": None,
"top_k": None,
Expand Down Expand Up @@ -177,6 +178,7 @@ async def test_multiplex_deployment(
"ignore_eos": None,
"presence_penalty": None,
"frequency_penalty": None,
"repetition_penalty": None,
"top_k": None,
"response_format": None,
"logprobs": None,
Expand Down
3 changes: 3 additions & 0 deletions python/ray/llm/tests/serve/mocks/mock_vllm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ def _parse_sampling_params(
frequency_penalty=sampling_params.frequency_penalty
if sampling_params.frequency_penalty is not None
else 0.0,
repetition_penalty=sampling_params.repetition_penalty
if sampling_params.repetition_penalty is not None
else 1.0,
temperature=sampling_params.temperature
if sampling_params.temperature is not None
else 1.0,
Expand Down