From c704918e781f86cf6e8516309e4d2744b521745a Mon Sep 17 00:00:00 2001
From: Nathan Sarrazin <sarrazin.nathan@gmail.com>
Date: Wed, 12 Feb 2025 11:45:14 +0000
Subject: [PATCH] fix: handle no beginToken for token-based reasoning models

---
 chart/env/prod.yaml                       |  2 +-
 src/lib/server/models.ts                  |  2 +-
 src/lib/server/textGeneration/generate.ts | 11 +++++++++--
 3 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml
index 6e4d1edb827..e611bccf2aa 100644
--- a/chart/env/prod.yaml
+++ b/chart/env/prod.yaml
@@ -139,7 +139,7 @@ envVars:
         "description": "The first reasoning model from DeepSeek, distilled into a 32B dense model. Outperforms o1-mini on multiple benchmarks.",
         "reasoning": {
           "type": "tokens",
-          "beginToken": "<think>",
+          "beginToken": "",
           "endToken": "</think>"
         },
         "promptExamples": [
diff --git a/src/lib/server/models.ts b/src/lib/server/models.ts
index 746f41b4aa4..be657d43ed2 100644
--- a/src/lib/server/models.ts
+++ b/src/lib/server/models.ts
@@ -24,7 +24,7 @@ const reasoningSchema = z.union([
 	}),
 	z.object({
 		type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
-		beginToken: z.string(),
+		beginToken: z.string(), // empty string means the model starts in reasoning mode
 		endToken: z.string(),
 	}),
 	z.object({
diff --git a/src/lib/server/textGeneration/generate.ts b/src/lib/server/textGeneration/generate.ts
index b92d924dd27..51ebaa0c022 100644
--- a/src/lib/server/textGeneration/generate.ts
+++ b/src/lib/server/textGeneration/generate.ts
@@ -27,7 +27,10 @@ export async function* generate(
 	const startTime = new Date();
 	if (
 		model.reasoning &&
-		(model.reasoning.type === "regex" || model.reasoning.type === "summarize")
+		// if the beginToken is an empty string, the model starts in reasoning mode
+		(model.reasoning.type === "regex" ||
+			model.reasoning.type === "summarize" ||
+			(model.reasoning.type === "tokens" && model.reasoning.beginToken === ""))
 	) {
 		// if the model has reasoning in regex or summarize mode, it starts in reasoning mode
 		// and we extract the answer from the reasoning
@@ -104,7 +107,11 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
 			} else if (model.reasoning && model.reasoning.type === "tokens") {
 				// make sure to remove the content of the reasoning buffer from
 				// the final answer to avoid duplication
-				const beginIndex = reasoningBuffer.indexOf(model.reasoning.beginToken);
+
+				// if the beginToken is an empty string, we don't need to remove anything
+				const beginIndex = model.reasoning.beginToken
+					? reasoningBuffer.indexOf(model.reasoning.beginToken)
+					: 0;
 				const endIndex = reasoningBuffer.lastIndexOf(model.reasoning.endToken);
 
 				if (beginIndex !== -1 && endIndex !== -1) {