File tree 4 files changed +13
-0
lines changed
_internal/serve/deployments/llm/vllm
cpu/deployments/llm/multiplex
4 files changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -919,6 +919,9 @@ def _parse_sampling_params(
919
919
frequency_penalty = sampling_params .frequency_penalty
920
920
if sampling_params .frequency_penalty is not None
921
921
else 0.0 ,
922
+ repetition_penalty = sampling_params .repetition_penalty
923
+ if sampling_params .repetition_penalty is not None
924
+ else 1.0 ,
922
925
temperature = sampling_params .temperature
923
926
if sampling_params .temperature is not None
924
927
else 1.0 ,
Original file line number Diff line number Diff line change @@ -211,11 +211,16 @@ class VLLMSamplingParams(SamplingParams):
211
211
Args:
212
212
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering.
213
213
seed: Seed for deterministic sampling with temperature>0.
214
+ repetition_penalty: Float that penalizes new tokens based on whether they
215
+ appear in the prompt and the generated text so far. Values > 1 encourage
216
+ the model to use new tokens, while values < 1 encourage the model to repeat
217
+ tokens.
214
218
"""
215
219
216
220
_ignored_fields = {"best_of" , "n" , "logit_bias" }
217
221
218
222
top_k : Optional [int ] = None
223
+ repetition_penalty : Optional [float ] = None
219
224
seed : Optional [int ] = None
220
225
221
226
Original file line number Diff line number Diff line change 74
74
"ignore_eos" : None ,
75
75
"presence_penalty" : None ,
76
76
"frequency_penalty" : None ,
77
+ "repetition_penalty" : None ,
77
78
"best_of" : 1 ,
78
79
"response_format" : None ,
79
80
"top_k" : None ,
@@ -177,6 +178,7 @@ async def test_multiplex_deployment(
177
178
"ignore_eos" : None ,
178
179
"presence_penalty" : None ,
179
180
"frequency_penalty" : None ,
181
+ "repetition_penalty" : None ,
180
182
"top_k" : None ,
181
183
"response_format" : None ,
182
184
"logprobs" : None ,
Original file line number Diff line number Diff line change @@ -162,6 +162,9 @@ def _parse_sampling_params(
162
162
frequency_penalty = sampling_params .frequency_penalty
163
163
if sampling_params .frequency_penalty is not None
164
164
else 0.0 ,
165
+ repetition_penalty = sampling_params .repetition_penalty
166
+ if sampling_params .repetition_penalty is not None
167
+ else 1.0 ,
165
168
temperature = sampling_params .temperature
166
169
if sampling_params .temperature is not None
167
170
else 1.0 ,
You can’t perform that action at this time.
0 commit comments