From 9e8a7cf8475e97ed9340fa3c9ccb9d0900eb8f42 Mon Sep 17 00:00:00 2001 From: Viduni Wickramarachchi Date: Mon, 3 Mar 2025 13:58:41 -0500 Subject: [PATCH 1/4] [Obs AI Assistant] Improve error handling in evaluation framework --- .../common/convert_messages_for_inference.ts | 26 +++++++++++++++++-- .../server/service/client/index.ts | 5 ++-- .../scenarios/elasticsearch/index.spec.ts | 6 +++++ .../evaluation/scenarios/kb/index.spec.ts | 4 +-- .../server/functions/query/index.ts | 4 ++- 5 files changed, 37 insertions(+), 8 deletions(-) diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/convert_messages_for_inference.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/convert_messages_for_inference.ts index 229183ed142a7..5e7e4fc6dd944 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/convert_messages_for_inference.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/convert_messages_for_inference.ts @@ -11,13 +11,35 @@ import { MessageRole as InferenceMessageRole, } from '@kbn/inference-common'; import { generateFakeToolCallId } from '@kbn/inference-plugin/common'; +import type { Logger } from '@kbn/logging'; import { Message, MessageRole } from '.'; -export function convertMessagesForInference(messages: Message[]): InferenceMessage[] { +export function convertMessagesForInference( + messages: Message[], + logger: Pick +): InferenceMessage[] { const inferenceMessages: InferenceMessage[] = []; messages.forEach((message) => { if (message.message.role === MessageRole.Assistant) { + let parsedArguments; + if (message.message.function_call?.name) { + try { + parsedArguments = message.message.function_call?.arguments + ? JSON.parse(message.message.function_call.arguments) + : {}; + } catch (error) { + logger.error( + `Failed to parse function call arguments when converting messages for inference: ${error}` + ); + // if the LLM returns invalid JSON, it is likley because it is hallucinating + // the function. We don't want to propogate the error about invalid JSON here. + // Any errors related to the function call will be caught when the function and + // it's arguments are is validated + return {}; + } + } + inferenceMessages.push({ role: InferenceMessageRole.Assistant, content: message.message.content ?? null, @@ -27,7 +49,7 @@ export function convertMessagesForInference(messages: Message[]): InferenceMessa { function: { name: message.message.function_call.name, - arguments: JSON.parse(message.message.function_call.arguments || '{}'), + arguments: parsedArguments || {}, }, toolCallId: generateFakeToolCallId(), }, diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts index 2e2dddfbb08fe..3b873e6c163cb 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/server/service/client/index.ts @@ -463,15 +463,14 @@ export class ObservabilityAIAssistantClient { const options = { connectorId, system: systemMessage, - messages: convertMessagesForInference(messages), + messages: convertMessagesForInference(messages, this.dependencies.logger), toolChoice, tools, functionCalling: (simulateFunctionCalling ? 'simulated' : 'auto') as FunctionCallingMode, }; this.dependencies.logger.debug( - () => - `Calling inference client with for name: "${name}" with options: ${JSON.stringify(options)}` + () => `Calling inference client for name: "${name}" with options: ${JSON.stringify(options)}` ); if (stream) { diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts index f286b8c0bb662..4b2ced6c82529 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts @@ -144,6 +144,12 @@ describe('Elasticsearch functions', () => { expect(result.passed).to.be(true); }); + + after(async () => { + await esClient.indices.delete({ + index: 'testing_ai_assistant', + }); + }); }); }); diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts index e047a02f5c582..2a6558ef1cd32 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts @@ -96,7 +96,7 @@ describe('Knowledge base', () => { const conversation = await chatClient.complete({ messages: prompt }); const result = await chatClient.evaluate(conversation, [ - 'Uses KB retrieval function to find information about the Quantum Revectorization Engine', + 'Uses context function response to find information about the Quantum Revectorization Engine', 'Correctly identifies Dr. Eliana Stone at Acme Labs in 2023 as the inventor', 'Accurately describes that it reorders the subatomic structure of materials and can transform silicon wafers into superconductive materials', 'Does not invent unrelated or hallucinated details not present in the KB', @@ -111,7 +111,7 @@ describe('Knowledge base', () => { const conversation = await chatClient.complete({ messages: prompt }); const result = await chatClient.evaluate(conversation, [ - 'Uses KB retrieval function to find the correct document about QRE constraints', + 'Uses context function response to find the correct document about QRE constraints', 'Mentions the 2 nanometer limit on the revectorization radius', 'Mentions that specialized fusion reactors are needed', 'Does not mention information unrelated to constraints or energy (i.e., does not mention the inventor or silicon wafer transformation from doc-invention-1)', diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/server/functions/query/index.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/server/functions/query/index.ts index 0316e97deeade..fbda995416ad5 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/server/functions/query/index.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/server/functions/query/index.ts @@ -102,6 +102,7 @@ export function registerQueryFunction({ }; } ); + functions.registerFunction( { name: QUERY_FUNCTION_NAME, @@ -128,7 +129,8 @@ export function registerQueryFunction({ connectorId, messages: convertMessagesForInference( // remove system message and query function request - messages.filter((message) => message.message.role !== MessageRole.System).slice(0, -1) + messages.filter((message) => message.message.role !== MessageRole.System).slice(0, -1), + resources.logger ), logger: resources.logger, tools: Object.fromEntries( From e689f62cb9a52ca2f5609e42c64a0f2735fe48ea Mon Sep 17 00:00:00 2001 From: Viduni Wickramarachchi Date: Mon, 3 Mar 2025 16:16:42 -0500 Subject: [PATCH 2/4] [Obs AI Assistant] Increase the 429 backoff timer by 10s --- .../scripts/evaluation/kibana_client.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts index b10c583a5bcdf..7829baf68515e 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts @@ -328,10 +328,10 @@ export class KibanaClient { } if (error.message.includes('Status code: 429')) { - that.log.info(`429, backing off 20s`); - - return timer(20000); + that.log.info(`429, backing off 30s`); + return timer(30000); } + that.log.info(`Retrying in 5s`); return timer(5000); }, From 0f88490c440a2bb24d601d31d101b7501f3a715e Mon Sep 17 00:00:00 2001 From: Viduni Wickramarachchi Date: Mon, 3 Mar 2025 16:30:09 -0500 Subject: [PATCH 3/4] [Obs AI Assistant] Revert index deletion --- .../evaluation/scenarios/elasticsearch/index.spec.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts index 4b2ced6c82529..f286b8c0bb662 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts @@ -144,12 +144,6 @@ describe('Elasticsearch functions', () => { expect(result.passed).to.be(true); }); - - after(async () => { - await esClient.indices.delete({ - index: 'testing_ai_assistant', - }); - }); }); }); From acafe83a7910a8bac0175ee18a24e16d877e2e96 Mon Sep 17 00:00:00 2001 From: Viduni Wickramarachchi Date: Mon, 3 Mar 2025 16:37:46 -0500 Subject: [PATCH 4/4] [Obs AI Assistant] Fix typo --- .../common/convert_messages_for_inference.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/platform/plugins/shared/observability_ai_assistant/common/convert_messages_for_inference.ts b/x-pack/platform/plugins/shared/observability_ai_assistant/common/convert_messages_for_inference.ts index 5e7e4fc6dd944..8af2f835a6930 100644 --- a/x-pack/platform/plugins/shared/observability_ai_assistant/common/convert_messages_for_inference.ts +++ b/x-pack/platform/plugins/shared/observability_ai_assistant/common/convert_messages_for_inference.ts @@ -35,7 +35,7 @@ export function convertMessagesForInference( // if the LLM returns invalid JSON, it is likley because it is hallucinating // the function. We don't want to propogate the error about invalid JSON here. // Any errors related to the function call will be caught when the function and - // it's arguments are is validated + // it's arguments are validated return {}; } }