From 14198a87873e16793f0cacf3cca513713d493b29 Mon Sep 17 00:00:00 2001
From: Takala <dylan91011028@gmail.com>
Date: Tue, 24 Dec 2024 21:20:38 +0800
Subject: [PATCH] Llm core (#64)

* refactor: rename isHarmfulContentFile to isHarmfulContent and remove unused function

* feat: add off-topic detection and moderation warning to chat response
---
 src/lib/schema/conversation.ts                |   4 +
 src/lib/server/llm.ts                         | 130 ++++++++++++------
 src/lib/server/prompt.ts                      |  10 ++
 .../conversations/[conv_id]/chat/+server.ts   |   6 +-
 4 files changed, 106 insertions(+), 44 deletions(-)
diff --git a/src/lib/schema/conversation.ts b/src/lib/schema/conversation.ts
index 7cd3592..bf60320 100644
--- a/src/lib/schema/conversation.ts
+++ b/src/lib/schema/conversation.ts
@@ -16,6 +16,10 @@ export const ConversationSchema = z.object({
 			audio: z.string().nullable() // to find the raw file
 		})
 	),
+	warning: z.object({
+		moderation: z.boolean().default(false),
+		offTopic: z.number().default(0)
+	}),
 	subtaskCompleted: z.array(z.boolean().default(false)),
 	summary: z.string().nullable(),
 	keyPoints: z.array(z.string()).nullable()
diff --git a/src/lib/server/llm.ts b/src/lib/server/llm.ts
index 9319d4b..efde686 100644
--- a/src/lib/server/llm.ts
+++ b/src/lib/server/llm.ts
@@ -10,6 +10,7 @@ import {
 	CONCEPT_SUMMARY_PROMPT,
 	DOCS_CONTEXT_SYSTEM_PROMPT,
 	GROUP_OPINION_SUMMARY_PROMPT,
+	OFF_TOPIC_DETECTION_PROMPT,
 	SUBTASKS_COMPLETED_PROMPT
 } from './prompt';
 
@@ -18,14 +19,63 @@ const openai = new OpenAI({
 	baseURL: env.OPENAI_BASE_URL
 });
 
-async function isHarmfulContent(content: string): Promise<boolean> {
+async function isHarmfulContent(
+	content: string
+): Promise<{ success: boolean; harmful: boolean; error?: string }> {
 	console.log('Checking content for harmful content:', { contentLength: content.length });
-	const moderation = await openai.moderations.create({
-		model: 'omni-moderation-latest',
-		input: content
-	});
-	console.log('Moderation result:', moderation.results[0]);
-	return moderation.results[0].flagged;
+	try {
+		const moderation = await openai.moderations.create({
+			model: 'omni-moderation-latest',
+			input: content
+		});
+		console.log('Moderation result:', moderation.results[0]);
+		return {
+			success: true,
+			harmful: moderation.results[0].flagged,
+			error: ''
+		};
+	} catch (error) {
+		console.error('Error in isHarmfulContent:', error);
+		return {
+			success: false,
+			harmful: false,
+			error: 'Failed to detect harmful content'
+		};
+	}
+}
+
+async function isOffTopic(
+	history: LLMChatMessage[],
+	prompt: string
+): Promise<{ success: boolean; off_topic: boolean; error?: string }> {
+	console.log('Checking if off topic:', { historyLength: history.length });
+	try {
+		const llm_message = history.length > 1 ? history[history.length - 2].content : prompt;
+		const student_message = history[history.length - 1].content;
+		const system_prompt = OFF_TOPIC_DETECTION_PROMPT.replace('{llmMessage}', llm_message).replace(
+			'{studentMessage}',
+			student_message
+		);
+
+		const response = await requestZodLLM(system_prompt, z.object({ result: z.boolean() }));
+
+		if (!response.success) {
+			throw new Error('Failed to detect off topic response');
+		}
+
+		return {
+			success: true,
+			off_topic: response.message.result,
+			error: ''
+		};
+	} catch (error) {
+		console.error('Error in isOffTopic:', error);
+		return {
+			success: false,
+			off_topic: false,
+			error: 'Failed to detect off topic'
+		};
+	}
 }
 
 export async function checkFileContent(
@@ -35,7 +85,7 @@ export async function checkFileContent(
 	try {
 		const content = await fs.readFile(filePath, 'utf-8');
 		console.log('File content read successfully:', { contentLength: content.length });
-		if (await isHarmfulContentFile(content)) {
+		if (await isHarmfulContent(content)) {
 			console.warn('Harmful content detected in file');
 			return {
 				success: false,
@@ -57,15 +107,6 @@ export async function checkFileContent(
 	}
 }
 
-export async function isHarmfulContentFile(message: string) {
-	const moderation = await openai.moderations.create({
-		model: 'omni-moderation-latest',
-		input: message
-	});
-
-	return moderation.results[0].flagged;
-}
-
 export async function requestChatLLM(
 	system_prompt: string,
 	history: LLMChatMessage[],
@@ -137,7 +178,13 @@ export async function chatWithLLMByDocs(
 	subtasks: string[],
 	resources: Resource[],
 	temperature = 0.7
-): Promise<{ success: boolean; message: string; subtask_completed: boolean[]; error?: string }> {
+): Promise<{
+	success: boolean;
+	message: string;
+	subtask_completed: boolean[];
+	warning: { off_topic: boolean; moderation: boolean };
+	error?: string;
+}> {
 	console.log('Starting chatWithLLMByDocs:', {
 		historyLength: history.length,
 		task,
@@ -145,15 +192,6 @@ export async function chatWithLLMByDocs(
 		resourcesCount: resources.length
 	});
 	try {
-		const last_message_content = history[history.length - 1]?.content;
-		if (last_message_content && (await isHarmfulContent(last_message_content))) {
-			return {
-				success: false,
-				message: '',
-				subtask_completed: [],
-				error: 'Harmful content detected in the last message'
-			};
-		}
 		const formatted_docs = resources
 			.map((doc, index) => {
 				const title = doc.name || `Document ${index + 1}`;
@@ -165,25 +203,30 @@ export async function chatWithLLMByDocs(
 			.replace('{subtasks}', subtasks.join('\n'))
 			.replace('{resources}', formatted_docs);
 
-		const subtask_completed = await checkSubtaskCompleted(history, subtasks);
-		console.log('Formatted system prompt:', {
-			promptLength: system_prompt.length,
-			subtaskCompletedCount: subtask_completed.completed.length
-		});
-
-		const response = await requestChatLLM(system_prompt, history, temperature);
-		console.log('Chat response received:', {
-			success: response.success,
-			messageLength: response.message.length
-		});
-
-		if (!response.success) {
-			throw new Error('Failed to parse response');
+		const [response, subtask_completed, moderation, off_topic] = await Promise.all([
+			requestChatLLM(system_prompt, history, temperature),
+			checkSubtaskCompleted(history, subtasks),
+			isHarmfulContent(history[history.length - 1].content),
+			isOffTopic(history, system_prompt)
+		]);
+
+		if (
+			!response.success ||
+			!subtask_completed.success ||
+			!moderation.success ||
+			!off_topic.success
+		) {
+			throw new Error('Failed to get response');
 		}
+
 		return {
 			success: true,
 			message: response.message,
-			subtask_completed: subtask_completed.completed
+			subtask_completed: subtask_completed.completed,
+			warning: {
+				moderation: moderation.harmful,
+				off_topic: off_topic.off_topic
+			}
 		};
 	} catch (error) {
 		console.error('Error in chatWithLLMByDocs:', error);
@@ -191,7 +234,8 @@ export async function chatWithLLMByDocs(
 			success: false,
 			message: '',
 			subtask_completed: [],
-			error: 'Failed to chat with LLM'
+			warning: { moderation: false, off_topic: false },
+			error: 'Failed to chat with LLM by docs'
 		};
 	}
 }
diff --git a/src/lib/server/prompt.ts b/src/lib/server/prompt.ts
index a05aff1..493ddb2 100644
--- a/src/lib/server/prompt.ts
+++ b/src/lib/server/prompt.ts
@@ -36,6 +36,16 @@ export const CHAT_SUMMARY_PROMPT = `
 學生的關鍵字：
 `;
 
+export const OFF_TOPIC_DETECTION_PROMPT = `
+請偵測學生的訊息是否聊天內容偏離主題，並回傳一個 Boolean 值，如果偏離主題則回傳 true，否則回傳 false。
+
+LLM訊息：
+{llmMessage}
+學生訊息：
+{studentMessage}
+結果：
+`;
+
 export const CONCEPT_SUMMARY_PROMPT = `
 以下是學生們個別的觀點與想法，每位學生的想法與觀點用{separator}分隔，請你總結學生們的觀點，並歸納出學生的正反意見，以及學生對於這個概念的理解。
 
diff --git a/src/routes/api/session/[id]/group/[group_number]/conversations/[conv_id]/chat/+server.ts b/src/routes/api/session/[id]/group/[group_number]/conversations/[conv_id]/chat/+server.ts
index b800717..7981342 100644
--- a/src/routes/api/session/[id]/group/[group_number]/conversations/[conv_id]/chat/+server.ts
+++ b/src/routes/api/session/[id]/group/[group_number]/conversations/[conv_id]/chat/+server.ts
@@ -28,7 +28,7 @@ export const POST: RequestHandler = async ({ request, params, locals }) => {
 
 		const conversation_ref = await getConversationRef(id, group_number, conv_id);
 		console.log('Retrieved conversation reference');
-		const { userId, task, subtasks, resources, history, subtaskCompleted } =
+		const { userId, task, subtasks, resources, history, warning, subtaskCompleted } =
 			await getConversationData(conversation_ref);
 		console.log('Retrieved conversation data', { userId, task, subtasksCount: subtasks.length });
 
@@ -83,6 +83,10 @@ export const POST: RequestHandler = async ({ request, params, locals }) => {
 					content: response.message
 				}
 			],
+			warning: {
+				moderation: warning.moderation || response.warning.moderation,
+				offTopic: response.warning.off_topic ? warning.offTopic + 1 : 0
+			},
 			subtaskCompleted: subtaskCompleted.map(
 				(completed, index) => completed || response.subtask_completed[index]
 			)