Skip to content

Commit c8e44a7

Browse files
committed
pass min_tokens
1 parent 0f93158 commit c8e44a7

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def __call__(
6363
llama_types.ChatCompletionRequestResponseFormat
6464
] = None,
6565
max_tokens: Optional[int] = None,
66+
min_tokens: int = 0,
6667
presence_penalty: float = 0.0,
6768
frequency_penalty: float = 0.0,
6869
repeat_penalty: float = 1.1,
@@ -501,6 +502,7 @@ def chat_completion_handler(
501502
llama_types.ChatCompletionRequestResponseFormat
502503
] = None,
503504
max_tokens: Optional[int] = None,
505+
min_tokens: int = 0,
504506
presence_penalty: float = 0.0,
505507
frequency_penalty: float = 0.0,
506508
repeat_penalty: float = 1.1,
@@ -586,6 +588,7 @@ def chat_completion_handler(
586588
stop=stop,
587589
seed=seed,
588590
max_tokens=max_tokens,
591+
min_tokens=min_tokens,
589592
presence_penalty=presence_penalty,
590593
frequency_penalty=frequency_penalty,
591594
repeat_penalty=repeat_penalty,
@@ -1295,6 +1298,7 @@ def functionary_chat_handler(
12951298
stop: Optional[Union[str, List[str]]] = [],
12961299
response_format: Optional[llama_types.ChatCompletionRequestResponseFormat] = None,
12971300
max_tokens: Optional[int] = None,
1301+
min_tokens: int = 0,
12981302
presence_penalty: float = 0.0,
12991303
frequency_penalty: float = 0.0,
13001304
repeat_penalty: float = 1.1,
@@ -1501,6 +1505,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
15011505
stream=stream,
15021506
stop=["user:", "</s>"],
15031507
max_tokens=max_tokens,
1508+
min_tokens=min_tokens,
15041509
presence_penalty=presence_penalty,
15051510
frequency_penalty=frequency_penalty,
15061511
repeat_penalty=repeat_penalty,
@@ -1577,6 +1582,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
15771582
stream=False,
15781583
grammar=grammar,
15791584
max_tokens=max_tokens,
1585+
min_tokens=min_tokens,
15801586
temperature=temperature,
15811587
top_p=top_p,
15821588
top_k=top_k,
@@ -1654,6 +1660,7 @@ def functionary_v1_v2_chat_handler(
16541660
stop: Optional[Union[str, List[str]]] = [],
16551661
response_format: Optional[llama_types.ChatCompletionRequestResponseFormat] = None,
16561662
max_tokens: Optional[int] = None,
1663+
min_tokens: int = 0,
16571664
presence_penalty: float = 0.0,
16581665
frequency_penalty: float = 0.0,
16591666
repeat_penalty: float = 1.1,
@@ -1860,6 +1867,7 @@ def prepare_messages_for_inference(
18601867
stream=stream,
18611868
stop=stop,
18621869
max_tokens=max_tokens,
1870+
min_tokens=min_tokens,
18631871
presence_penalty=presence_penalty,
18641872
frequency_penalty=frequency_penalty,
18651873
repeat_penalty=repeat_penalty,
@@ -1920,6 +1928,7 @@ def create_completion(stop):
19201928
stream=False,
19211929
stop=stop,
19221930
max_tokens=max_tokens,
1931+
min_tokens=min_tokens,
19231932
presence_penalty=presence_penalty,
19241933
frequency_penalty=frequency_penalty,
19251934
repeat_penalty=repeat_penalty,
@@ -2157,6 +2166,7 @@ def __call__(
21572166
llama_types.ChatCompletionRequestResponseFormat
21582167
] = None,
21592168
max_tokens: Optional[int] = None,
2169+
min_tokens: int = 0,
21602170
presence_penalty: float = 0.0,
21612171
frequency_penalty: float = 0.0,
21622172
repeat_penalty: float = 1.1,
@@ -2269,6 +2279,7 @@ def __call__(
22692279
stream=stream,
22702280
stop=stop,
22712281
max_tokens=max_tokens,
2282+
min_tokens=min_tokens,
22722283
presence_penalty=presence_penalty,
22732284
frequency_penalty=frequency_penalty,
22742285
repeat_penalty=repeat_penalty,
@@ -2301,6 +2312,7 @@ def chatml_function_calling(
23012312
stop: Optional[Union[str, List[str]]] = [],
23022313
response_format: Optional[llama_types.ChatCompletionRequestResponseFormat] = None,
23032314
max_tokens: Optional[int] = None,
2315+
min_tokens: int = 0,
23042316
presence_penalty: float = 0.0,
23052317
frequency_penalty: float = 0.0,
23062318
repeat_penalty: float = 1.1,
@@ -2427,6 +2439,7 @@ def chatml_function_calling(
24272439
stream=stream,
24282440
stop=stop,
24292441
max_tokens=max_tokens,
2442+
min_tokens=min_tokens,
24302443
presence_penalty=presence_penalty,
24312444
frequency_penalty=frequency_penalty,
24322445
repeat_penalty=repeat_penalty,
@@ -2479,6 +2492,7 @@ def chatml_function_calling(
24792492
stream=stream,
24802493
stop=stop,
24812494
max_tokens=max_tokens,
2495+
min_tokens=min_tokens,
24822496
presence_penalty=presence_penalty,
24832497
frequency_penalty=frequency_penalty,
24842498
repeat_penalty=repeat_penalty,
@@ -2523,6 +2537,7 @@ def chatml_function_calling(
25232537
stream=False,
25242538
stop=[":"],
25252539
max_tokens=None,
2540+
min_tokens=min_tokens,
25262541
presence_penalty=presence_penalty,
25272542
frequency_penalty=frequency_penalty,
25282543
repeat_penalty=repeat_penalty,
@@ -2550,6 +2565,7 @@ def chatml_function_calling(
25502565
stream=stream,
25512566
stop=["<|im_end|>"],
25522567
max_tokens=None,
2568+
min_tokens=min_tokens,
25532569
presence_penalty=presence_penalty,
25542570
frequency_penalty=frequency_penalty,
25552571
repeat_penalty=repeat_penalty,
@@ -2597,6 +2613,7 @@ def chatml_function_calling(
25972613
stream=False,
25982614
stop=stop,
25992615
max_tokens=None,
2616+
min_tokens=min_tokens,
26002617
presence_penalty=presence_penalty,
26012618
frequency_penalty=frequency_penalty,
26022619
repeat_penalty=repeat_penalty,
@@ -2624,6 +2641,7 @@ def chatml_function_calling(
26242641
stream=False,
26252642
stop=stop,
26262643
max_tokens=None,
2644+
min_tokens=min_tokens,
26272645
presence_penalty=presence_penalty,
26282646
frequency_penalty=frequency_penalty,
26292647
repeat_penalty=repeat_penalty,

0 commit comments

Comments
 (0)