@@ -63,6 +63,7 @@ def __call__(
63
63
llama_types .ChatCompletionRequestResponseFormat
64
64
] = None ,
65
65
max_tokens : Optional [int ] = None ,
66
+ min_tokens : int = 0 ,
66
67
presence_penalty : float = 0.0 ,
67
68
frequency_penalty : float = 0.0 ,
68
69
repeat_penalty : float = 1.1 ,
@@ -501,6 +502,7 @@ def chat_completion_handler(
501
502
llama_types .ChatCompletionRequestResponseFormat
502
503
] = None ,
503
504
max_tokens : Optional [int ] = None ,
505
+ min_tokens : int = 0 ,
504
506
presence_penalty : float = 0.0 ,
505
507
frequency_penalty : float = 0.0 ,
506
508
repeat_penalty : float = 1.1 ,
@@ -586,6 +588,7 @@ def chat_completion_handler(
586
588
stop = stop ,
587
589
seed = seed ,
588
590
max_tokens = max_tokens ,
591
+ min_tokens = min_tokens ,
589
592
presence_penalty = presence_penalty ,
590
593
frequency_penalty = frequency_penalty ,
591
594
repeat_penalty = repeat_penalty ,
@@ -1295,6 +1298,7 @@ def functionary_chat_handler(
1295
1298
stop : Optional [Union [str , List [str ]]] = [],
1296
1299
response_format : Optional [llama_types .ChatCompletionRequestResponseFormat ] = None ,
1297
1300
max_tokens : Optional [int ] = None ,
1301
+ min_tokens : int = 0 ,
1298
1302
presence_penalty : float = 0.0 ,
1299
1303
frequency_penalty : float = 0.0 ,
1300
1304
repeat_penalty : float = 1.1 ,
@@ -1501,6 +1505,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
1501
1505
stream = stream ,
1502
1506
stop = ["user:" , "</s>" ],
1503
1507
max_tokens = max_tokens ,
1508
+ min_tokens = min_tokens ,
1504
1509
presence_penalty = presence_penalty ,
1505
1510
frequency_penalty = frequency_penalty ,
1506
1511
repeat_penalty = repeat_penalty ,
@@ -1577,6 +1582,7 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
1577
1582
stream = False ,
1578
1583
grammar = grammar ,
1579
1584
max_tokens = max_tokens ,
1585
+ min_tokens = min_tokens ,
1580
1586
temperature = temperature ,
1581
1587
top_p = top_p ,
1582
1588
top_k = top_k ,
@@ -1654,6 +1660,7 @@ def functionary_v1_v2_chat_handler(
1654
1660
stop : Optional [Union [str , List [str ]]] = [],
1655
1661
response_format : Optional [llama_types .ChatCompletionRequestResponseFormat ] = None ,
1656
1662
max_tokens : Optional [int ] = None ,
1663
+ min_tokens : int = 0 ,
1657
1664
presence_penalty : float = 0.0 ,
1658
1665
frequency_penalty : float = 0.0 ,
1659
1666
repeat_penalty : float = 1.1 ,
@@ -1860,6 +1867,7 @@ def prepare_messages_for_inference(
1860
1867
stream = stream ,
1861
1868
stop = stop ,
1862
1869
max_tokens = max_tokens ,
1870
+ min_tokens = min_tokens ,
1863
1871
presence_penalty = presence_penalty ,
1864
1872
frequency_penalty = frequency_penalty ,
1865
1873
repeat_penalty = repeat_penalty ,
@@ -1920,6 +1928,7 @@ def create_completion(stop):
1920
1928
stream = False ,
1921
1929
stop = stop ,
1922
1930
max_tokens = max_tokens ,
1931
+ min_tokens = min_tokens ,
1923
1932
presence_penalty = presence_penalty ,
1924
1933
frequency_penalty = frequency_penalty ,
1925
1934
repeat_penalty = repeat_penalty ,
@@ -2157,6 +2166,7 @@ def __call__(
2157
2166
llama_types .ChatCompletionRequestResponseFormat
2158
2167
] = None ,
2159
2168
max_tokens : Optional [int ] = None ,
2169
+ min_tokens : int = 0 ,
2160
2170
presence_penalty : float = 0.0 ,
2161
2171
frequency_penalty : float = 0.0 ,
2162
2172
repeat_penalty : float = 1.1 ,
@@ -2269,6 +2279,7 @@ def __call__(
2269
2279
stream = stream ,
2270
2280
stop = stop ,
2271
2281
max_tokens = max_tokens ,
2282
+ min_tokens = min_tokens ,
2272
2283
presence_penalty = presence_penalty ,
2273
2284
frequency_penalty = frequency_penalty ,
2274
2285
repeat_penalty = repeat_penalty ,
@@ -2301,6 +2312,7 @@ def chatml_function_calling(
2301
2312
stop : Optional [Union [str , List [str ]]] = [],
2302
2313
response_format : Optional [llama_types .ChatCompletionRequestResponseFormat ] = None ,
2303
2314
max_tokens : Optional [int ] = None ,
2315
+ min_tokens : int = 0 ,
2304
2316
presence_penalty : float = 0.0 ,
2305
2317
frequency_penalty : float = 0.0 ,
2306
2318
repeat_penalty : float = 1.1 ,
@@ -2427,6 +2439,7 @@ def chatml_function_calling(
2427
2439
stream = stream ,
2428
2440
stop = stop ,
2429
2441
max_tokens = max_tokens ,
2442
+ min_tokens = min_tokens ,
2430
2443
presence_penalty = presence_penalty ,
2431
2444
frequency_penalty = frequency_penalty ,
2432
2445
repeat_penalty = repeat_penalty ,
@@ -2479,6 +2492,7 @@ def chatml_function_calling(
2479
2492
stream = stream ,
2480
2493
stop = stop ,
2481
2494
max_tokens = max_tokens ,
2495
+ min_tokens = min_tokens ,
2482
2496
presence_penalty = presence_penalty ,
2483
2497
frequency_penalty = frequency_penalty ,
2484
2498
repeat_penalty = repeat_penalty ,
@@ -2523,6 +2537,7 @@ def chatml_function_calling(
2523
2537
stream = False ,
2524
2538
stop = [":" ],
2525
2539
max_tokens = None ,
2540
+ min_tokens = min_tokens ,
2526
2541
presence_penalty = presence_penalty ,
2527
2542
frequency_penalty = frequency_penalty ,
2528
2543
repeat_penalty = repeat_penalty ,
@@ -2550,6 +2565,7 @@ def chatml_function_calling(
2550
2565
stream = stream ,
2551
2566
stop = ["<|im_end|>" ],
2552
2567
max_tokens = None ,
2568
+ min_tokens = min_tokens ,
2553
2569
presence_penalty = presence_penalty ,
2554
2570
frequency_penalty = frequency_penalty ,
2555
2571
repeat_penalty = repeat_penalty ,
@@ -2597,6 +2613,7 @@ def chatml_function_calling(
2597
2613
stream = False ,
2598
2614
stop = stop ,
2599
2615
max_tokens = None ,
2616
+ min_tokens = min_tokens ,
2600
2617
presence_penalty = presence_penalty ,
2601
2618
frequency_penalty = frequency_penalty ,
2602
2619
repeat_penalty = repeat_penalty ,
@@ -2624,6 +2641,7 @@ def chatml_function_calling(
2624
2641
stream = False ,
2625
2642
stop = stop ,
2626
2643
max_tokens = None ,
2644
+ min_tokens = min_tokens ,
2627
2645
presence_penalty = presence_penalty ,
2628
2646
frequency_penalty = frequency_penalty ,
2629
2647
repeat_penalty = repeat_penalty ,
0 commit comments