Skip to content

Commit e712cff

Browse files
committed
feat: Update llama.cpp
1 parent 7403e00 commit e712cff

File tree

4 files changed

+36
-27
lines changed

4 files changed

+36
-27
lines changed

llama_cpp/_internals.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -362,13 +362,6 @@ def sample_min_p(self, candidates: "_LlamaTokenDataArray", p: float, min_keep: i
362362
self.ctx, llama_cpp.byref(candidates.candidates), p, min_keep
363363
)
364364

365-
def sample_tail_free(
366-
self, candidates: "_LlamaTokenDataArray", z: float, min_keep: int
367-
):
368-
llama_cpp.llama_sample_tail_free(
369-
self.ctx, llama_cpp.byref(candidates.candidates), z, min_keep
370-
)
371-
372365
def sample_typical(
373366
self, candidates: "_LlamaTokenDataArray", p: float, min_keep: int
374367
):
@@ -685,9 +678,6 @@ def sample(
685678
ctx_main.sample_top_k(
686679
token_data_array, self.params.top_k, min_keep=min_keep
687680
)
688-
ctx_main.sample_tail_free(
689-
token_data_array, self.params.tfs_z, min_keep=min_keep
690-
)
691681
ctx_main.sample_typical(
692682
token_data_array, self.params.typical_p, min_keep=min_keep
693683
)
@@ -776,10 +766,6 @@ def add_min_p(self, p: float, min_keep: int):
776766
sampler = llama_cpp.llama_sampler_init_min_p(p, min_keep)
777767
self._add_sampler(sampler)
778768

779-
def add_tail_free(self, z: float, min_keep: int):
780-
sampler = llama_cpp.llama_sampler_init_tail_free(z, min_keep)
781-
self._add_sampler(sampler)
782-
783769
def add_typical(self, p: float, min_keep: int):
784770
sampler = llama_cpp.llama_sampler_init_typical(p, min_keep)
785771
self._add_sampler(sampler)

llama_cpp/llama.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -745,7 +745,6 @@ def apply_func(token_data_array: llama_cpp.llama_token_data_array_p):
745745
n_probs = 0
746746
min_keep = max(1, n_probs)
747747
sampler.add_top_k(top_k)
748-
sampler.add_tail_free(tfs_z, min_keep)
749748
sampler.add_typical(typical_p, min_keep)
750749
sampler.add_top_p(top_p, min_keep)
751750
sampler.add_min_p(min_p, min_keep)

llama_cpp/llama_cpp.py

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3191,17 +3191,6 @@ def llama_sampler_init_min_p(p: float, min_keep: int) -> llama_sampler_p:
31913191
...
31923192

31933193

3194-
# /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
3195-
# LLAMA_API struct llama_sampler * llama_sampler_init_tail_free (float z, size_t min_keep);
3196-
@ctypes_function(
3197-
"llama_sampler_init_tail_free",
3198-
[ctypes.c_float, ctypes.c_size_t],
3199-
llama_sampler_p_ctypes,
3200-
)
3201-
def llama_sampler_init_tail_free(z: float, min_keep: int) -> llama_sampler_p:
3202-
...
3203-
3204-
32053194
# /// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
32063195
# LLAMA_API struct llama_sampler * llama_sampler_init_typical (float p, size_t min_keep);
32073196
@ctypes_function(
@@ -3343,6 +3332,41 @@ def llama_sampler_init_penalties(
33433332
...
33443333

33453334

3335+
# /// @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
3336+
# LLAMA_API struct llama_sampler * llama_sampler_init_dry(
3337+
# const struct llama_model * model,
3338+
# float dry_multiplier,
3339+
# float dry_base,
3340+
# int32_t dry_allowed_length,
3341+
# int32_t dry_penalty_last_n,
3342+
# const char ** seq_breakers,
3343+
# size_t num_breakers);
3344+
@ctypes_function(
3345+
"llama_sampler_init_dry",
3346+
[
3347+
llama_model_p_ctypes,
3348+
ctypes.c_float,
3349+
ctypes.c_float,
3350+
ctypes.c_int32,
3351+
ctypes.c_int32,
3352+
ctypes.POINTER(ctypes.c_char_p),
3353+
ctypes.c_size_t,
3354+
],
3355+
llama_sampler_p_ctypes,
3356+
)
3357+
def llama_sampler_init_dry(
3358+
model: llama_model_p,
3359+
dry_multiplier: float,
3360+
dry_base: float,
3361+
dry_allowed_length: int,
3362+
dry_penalty_last_n: int,
3363+
seq_breakers: CtypesArray[bytes],
3364+
num_breakers: int,
3365+
/,
3366+
) -> llama_sampler_p:
3367+
...
3368+
3369+
33463370
# LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
33473371
# int32_t n_vocab,
33483372
# int32_t n_logit_bias,

vendor/llama.cpp

0 commit comments

Comments
 (0)