From c704918e781f86cf6e8516309e4d2744b521745a Mon Sep 17 00:00:00 2001 From: Nathan Sarrazin Date: Wed, 12 Feb 2025 11:45:14 +0000 Subject: [PATCH] fix: handle no beginToken for token-based reasoning models --- chart/env/prod.yaml | 2 +- src/lib/server/models.ts | 2 +- src/lib/server/textGeneration/generate.ts | 11 +++++++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml index 6e4d1edb827..e611bccf2aa 100644 --- a/chart/env/prod.yaml +++ b/chart/env/prod.yaml @@ -139,7 +139,7 @@ envVars: "description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.", "reasoning": { "type": "tokens", - "beginToken": "", + "beginToken": "", "endToken": "" }, "promptExamples": [ diff --git a/src/lib/server/models.ts b/src/lib/server/models.ts index 746f41b4aa4..be657d43ed2 100644 --- a/src/lib/server/models.ts +++ b/src/lib/server/models.ts @@ -24,7 +24,7 @@ const reasoningSchema = z.union([ }), z.object({ type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer - beginToken: z.string(), + beginToken: z.string(), // empty string means the model starts in reasoning mode endToken: z.string(), }), z.object({ diff --git a/src/lib/server/textGeneration/generate.ts b/src/lib/server/textGeneration/generate.ts index b92d924dd27..51ebaa0c022 100644 --- a/src/lib/server/textGeneration/generate.ts +++ b/src/lib/server/textGeneration/generate.ts @@ -27,7 +27,10 @@ export async function* generate( const startTime = new Date(); if ( model.reasoning && - (model.reasoning.type === "regex" || model.reasoning.type === "summarize") + // if the beginToken is an empty string, the model starts in reasoning mode + (model.reasoning.type === "regex" || + model.reasoning.type === "summarize" || + (model.reasoning.type === "tokens" && model.reasoning.beginToken === "")) ) { // if the model has reasoning in regex or summarize mode, it starts in reasoning mode // and we extract the answer from the reasoning @@ -104,7 +107,11 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`, } else if (model.reasoning && model.reasoning.type === "tokens") { // make sure to remove the content of the reasoning buffer from // the final answer to avoid duplication - const beginIndex = reasoningBuffer.indexOf(model.reasoning.beginToken); + + // if the beginToken is an empty string, we don't need to remove anything + const beginIndex = model.reasoning.beginToken + ? reasoningBuffer.indexOf(model.reasoning.beginToken) + : 0; const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken); if (beginIndex !== -1 && endIndex !== -1) {