Bindings: Don't error on code 2 for llama_decode

kingbri1 · kingbri1 · commit f297f2fb2bec · 2025-03-06T01:26:03.000-05:00
Code 2 is returned when a generation/process is aborted. Therefore,
the generator state is not broken and can continue to be used.

Signed-off-by: kingbri &lt;8082010+kingbri1@users.noreply.github.com&gt;
diff --git a/bindings/binding.cpp b/bindings/binding.cpp
@@ -792,8 +792,9 @@ const char* InferToReadbackBuffer(
                 currentBatchSize
             );
 
-            if (llama_decode(context, batch)) {
-                finishReason = "BatchDecode";
+            int decodeResult = llama_decode(context, batch);
+            if (decodeResult) {
+                finishReason = decodeResult == 2 ? "Aborted" : "BatchDecode";
                 return false;
             }
         }
@@ -841,8 +842,9 @@ const char* InferToReadbackBuffer(
 
     // Continue generation
     auto gen = [&](const llama_batch& batch, llama_sampler* smpl) -> std::pair<llama_token, bool> {
-        if (llama_decode(context, batch)) {
-            finishReason = "BatchDecode";
+        int decodeResult = llama_decode(context, batch);
+        if (decodeResult) {
+            finishReason = decodeResult == 2 ? "Aborted" : "BatchDecode";
             return {0, true};
         }
 
diff --git a/bindings/types.ts b/bindings/types.ts
@@ -18,6 +18,7 @@ export enum BindingFinishReason {
     MaxNewTokens = "MaxNewTokens",
     StopString = "StopString",
     TokenEncode = "TokenEncode",
+    Aborted = "Aborted",
 }
 
 export type GenerationChunk = StreamChunk | FinishChunk;

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ export enum BindingFinishReason {`
`18`	`18`	`MaxNewTokens = "MaxNewTokens",`
`19`	`19`	`StopString = "StopString",`
`20`	`20`	`TokenEncode = "TokenEncode",`
	`21`	`+ Aborted = "Aborted",`
`21`	`22`	`}`
`22`	`23`
`23`	`24`	`export type GenerationChunk = StreamChunk \| FinishChunk;`