Skip to content

Commit e299e94

Browse files
committed
Bindings: Update to new K/V cache API
Use llama_kv_self instead of the llama_kv_cache methods which are marked as deprecated. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
1 parent c60e3ab commit e299e94

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

bindings/binding.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ void FreeCtx(llama_context* ctx)
198198
void ClearContextKVCache(llama_context* ctx)
199199
{
200200
prevTokens.clear();
201-
llama_kv_cache_clear(ctx);
201+
llama_kv_self_clear(ctx);
202202
}
203203

204204
void FreeModel(llama_model* model)
@@ -799,7 +799,7 @@ const char* InferToReadbackBuffer(
799799

800800
// Check when tokens diverge and remove everything after the common prefix
801801
const size_t prefixEnd = common_lcp(tokens, prevTokens);
802-
llama_kv_cache_seq_rm(context, 0, prefixEnd, -1);
802+
llama_kv_self_seq_rm(context, 0, prefixEnd, -1);
803803

804804
for (size_t i = prefixEnd; i < tokens.size(); i += batchSize) {
805805
const size_t remaining = tokens.size() - i;
@@ -895,7 +895,7 @@ const char* InferToReadbackBuffer(
895895
auto [newTokenId, isEnd] = gen(firstBatch, sampler);
896896

897897
// Extra samplers - Banned strings
898-
int rewindPos = llama_get_kv_cache_used_cells(context);
898+
int rewindPos = llama_kv_self_used_cells(context);
899899
int rewindTokenId = 0;
900900
int tokenCount = 0;
901901
int rewindTokenCount = 0;
@@ -952,7 +952,7 @@ const char* InferToReadbackBuffer(
952952
buffer = "";
953953

954954
// Save last known accept point in case we have to rewind back to the last accept.
955-
rewindPos = llama_get_kv_cache_used_cells(context);
955+
rewindPos = llama_kv_self_used_cells(context);
956956
rewindTokenId = newTokenId;
957957
rewindTokenCount = tokenCount;
958958

@@ -979,7 +979,7 @@ const char* InferToReadbackBuffer(
979979
continue;
980980
}
981981
case MatchTrie::MatchResult::MATCHED_REWIND: {
982-
llama_kv_cache_seq_rm(context, 0, rewindPos, -1);
982+
llama_kv_self_seq_rm(context, 0, rewindPos, -1);
983983

984984
// Reset the detokenizer too when rewinding
985985
if (readbackBufferPtr->detokenizer) {

0 commit comments

Comments
 (0)