Skip to content

Commit f297f2f

Browse files
committed
Bindings: Don't error on code 2 for llama_decode
Code 2 is returned when a generation/process is aborted. Therefore, the generator state is not broken and can continue to be used. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
1 parent 197a99d commit f297f2f

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

bindings/binding.cpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -792,8 +792,9 @@ const char* InferToReadbackBuffer(
792792
currentBatchSize
793793
);
794794

795-
if (llama_decode(context, batch)) {
796-
finishReason = "BatchDecode";
795+
int decodeResult = llama_decode(context, batch);
796+
if (decodeResult) {
797+
finishReason = decodeResult == 2 ? "Aborted" : "BatchDecode";
797798
return false;
798799
}
799800
}
@@ -841,8 +842,9 @@ const char* InferToReadbackBuffer(
841842

842843
// Continue generation
843844
auto gen = [&](const llama_batch& batch, llama_sampler* smpl) -> std::pair<llama_token, bool> {
844-
if (llama_decode(context, batch)) {
845-
finishReason = "BatchDecode";
845+
int decodeResult = llama_decode(context, batch);
846+
if (decodeResult) {
847+
finishReason = decodeResult == 2 ? "Aborted" : "BatchDecode";
846848
return {0, true};
847849
}
848850

bindings/types.ts

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export enum BindingFinishReason {
1818
MaxNewTokens = "MaxNewTokens",
1919
StopString = "StopString",
2020
TokenEncode = "TokenEncode",
21+
Aborted = "Aborted",
2122
}
2223

2324
export type GenerationChunk = StreamChunk | FinishChunk;

0 commit comments

Comments
 (0)