Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Obs AI Assistant] Add test for get_dataset_info #213231

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export type LogDocument = Fields &
'cloud.availability_zone'?: string;
'cloud.project.id'?: string;
'cloud.instance.id'?: string;
'client.ip'?: string;
'error.stack_trace'?: string;
'error.exception'?: unknown;
'error.log'?: unknown;
Expand All @@ -68,6 +69,9 @@ export type LogDocument = Fields &
'event.duration': number;
'event.start': Date;
'event.end': Date;
'event.category'?: string;
'event.type'?: string;
'event.outcome'?: string;
labels?: Record<string, string>;
test_field: string | string[];
date: Date;
Expand All @@ -76,15 +80,19 @@ export type LogDocument = Fields &
svc: string;
hostname: string;
[LONG_FIELD_NAME]: string;
'http.status_code'?: number;
'http.response.status_code'?: number;
'http.response.bytes'?: number;
'http.request.method'?: string;
'http.request.referrer'?: string;
'http.version'?: string;
'url.path'?: string;
'process.name'?: string;
'kubernetes.namespace'?: string;
'kubernetes.pod.name'?: string;
'kubernetes.container.name'?: string;
'orchestrator.resource.name'?: string;
tags?: string | string[];
'user_agent.name'?: string;
}>;

class Log extends Serializable<LogDocument> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

import { LogDocument, log } from '@kbn/apm-synthtrace-client';
import moment from 'moment';
import { random } from 'lodash';
import { Scenario } from '../cli/scenario';
import { withClient } from '../lib/utils/with_client';
import { parseLogsScenarioOpts } from './helpers/logs_scenario_opts_parser';
import { IndexTemplateName } from '../lib/logs/custom_logsdb_index_templates';

const scenario: Scenario<LogDocument> = async (runOptions) => {
const { isLogsDb } = parseLogsScenarioOpts(runOptions.scenarioOpts);

return {
bootstrap: async ({ logsEsClient }) => {
if (isLogsDb) await logsEsClient.createIndexTemplate(IndexTemplateName.LogsDb);
},
teardown: async ({ logsEsClient }) => {
if (isLogsDb) await logsEsClient.deleteIndexTemplate(IndexTemplateName.LogsDb);
},

generate: ({ range, clients: { logsEsClient } }) => {
const { logger } = runOptions;

// Normal access logs
const normalAccessLogs = range
.interval('1m')
.rate(50)
.generator((timestamp) => {
return Array(5)
.fill(0)
.map(() => {
const logsData = constructApacheLogData();

return log
.create({ isLogsDb })
.message(
`${logsData['client.ip']} - - [${moment(timestamp).format(
'DD/MMM/YYYY:HH:mm:ss Z'
)}] "${logsData['http.request.method']} ${logsData['url.path']} HTTP/${
logsData['http.version']
}" ${logsData['http.response.status_code']} ${logsData['http.response.bytes']}`
)
.dataset('apache.access')
.defaults(logsData)
.timestamp(timestamp);
});
});

// attack simulation logs
const attackSimulationLogs = range
.interval('1m')
.rate(2)
.generator((timestamp) => {
return Array(2)
.fill(0)
.map(() => {
const logsData = constructApacheLogData();

return log
.create({ isLogsDb })
.message(
`ATTACK SIMULATION: ${logsData['client.ip']} attempted access to restricted path ${logsData['url.path']}`
)
.dataset('apache.security')
.logLevel('warning')
.defaults({
...logsData,
'event.category': 'network',
'event.type': 'access',
'event.outcome': 'failure',
})
.timestamp(timestamp);
});
});

return withClient(
logsEsClient,
logger.perf('generating_apache_logs', () => [normalAccessLogs, attackSimulationLogs])
);
},
};
};

export default scenario;

function constructApacheLogData(): LogDocument {
const APACHE_LOG_SCENARIOS = [
{
method: 'GET',
path: '/index.html',
responseCode: 200,
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
referrer: 'https://www.google.com',
},
{
method: 'POST',
path: '/login',
responseCode: 401,
userAgent: 'PostmanRuntime/7.29.0',
referrer: '-',
},
{
method: 'GET',
path: '/admin/dashboard',
responseCode: 403,
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)',
referrer: 'https://example.com/home',
},
];

const HOSTNAMES = ['www.example.com', 'blog.example.com', 'api.example.com'];
const CLOUD_REGIONS = ['us-east-1', 'eu-west-2', 'ap-southeast-1'];

const index = Math.floor(Math.random() * APACHE_LOG_SCENARIOS.length);
const { method, path, responseCode, userAgent, referrer } = APACHE_LOG_SCENARIOS[index];

const clientIp = generateIpAddress();
const hostname = HOSTNAMES[Math.floor(Math.random() * HOSTNAMES.length)];
const cloudRegion = CLOUD_REGIONS[Math.floor(Math.random() * CLOUD_REGIONS.length)];

return {
'http.request.method': method,
'url.path': path,
'http.response.status_code': responseCode,
hostname,
'cloud.region': cloudRegion,
'cloud.availability_zone': `${cloudRegion}a`,
'client.ip': clientIp,
'user_agent.name': userAgent,
'http.request.referrer': referrer,
};
}

function generateIpAddress() {
return `${random(0, 255)}.${random(0, 255)}.${random(0, 255)}.${random(0, 255)}`;
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { FunctionCallChatFunction } from '../../service/types';
const SELECT_RELEVANT_FIELDS_NAME = 'select_relevant_fields';
export const GET_RELEVANT_FIELD_NAMES_SYSTEM_MESSAGE = `You are a helpful assistant for Elastic Observability.
Your task is to determine which fields are relevant to the conversation by selecting only the field IDs from the provided list.
The list in the user message consists of JSON objects that map a human-readable "field" name to its unique "id".
The list in the user message consists of JSON objects that map a human-readable field "name" to its unique "id".
You must not output any field names — only the corresponding "id" values. Ensure that your output follows the exact JSON format specified.`;

export async function getRelevantFieldNames({
Expand Down Expand Up @@ -114,10 +114,12 @@ export async function getRelevantFieldNames({
'@timestamp': new Date().toISOString(),
message: {
role: MessageRole.User,
content: `Below is a list of fields. Each entry is a JSON object that contains a "field" (the field name) and an "id" (the unique identifier). Use only the "id" values from this list when selecting relevant fields:
content: `Below is a list of fields. Each entry is a JSON object that contains a "name" (the field name) and an "id" (the unique identifier). Use only the "id" values from this list when selecting relevant fields:

${fieldsInChunk
.map((field) => JSON.stringify({ field, id: shortIdTable.take(field) }))
.map((fieldName) =>
JSON.stringify({ name: fieldName, id: shortIdTable.take(fieldName) })
)
.join('\n')}`,
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
* 2.0.
*/

import { Message } from '../../../common';
import { FunctionRegistrationParameters } from '..';
import { FunctionVisibility } from '../../../common/functions/types';
import { FunctionCallChatFunction, RespondFunctionResources } from '../../service/types';
import { getRelevantFieldNames } from './get_relevant_field_names';

export const GET_DATASET_INFO_FUNCTION_NAME = 'get_dataset_info';
Expand All @@ -32,68 +34,87 @@ export function registerGetDatasetInfoFunction({
index: {
type: 'string',
description:
'index pattern the user is interested in or empty string to get information about all available indices',
'Index pattern the user is interested in or empty string to get information about all available indices. You are allowed to use wildcards like `logs*`.',
},
},
required: ['index'],
} as const,
},
async ({ arguments: { index }, messages, chat }, signal) => {
const coreContext = await resources.context.core;
async ({ arguments: { index: indexPattern }, messages, chat }, signal) => {
return getDatasetInfo({ resources, indexPattern, signal, messages, chat });
}
);
}

const esClient = coreContext.elasticsearch.client;
const savedObjectsClient = coreContext.savedObjects.client;
export async function getDatasetInfo({
resources,
indexPattern,
signal,
messages,
chat,
}: {
resources: RespondFunctionResources;
indexPattern: string;
signal: AbortSignal;
messages: Message[];
chat: FunctionCallChatFunction;
}) {
const coreContext = await resources.context.core;

let indices: string[] = [];
const esClient = coreContext.elasticsearch.client;
const savedObjectsClient = coreContext.savedObjects.client;

try {
const body = await esClient.asCurrentUser.indices.resolveIndex({
name: index === '' ? ['*', '*:*'] : index.split(','),
expand_wildcards: 'open',
});
indices = [
...body.indices.map((i) => i.name),
...body.data_streams.map((d) => d.name),
...body.aliases.map((d) => d.name),
];
} catch (e) {
indices = [];
}
let indices: string[] = [];

if (index === '') {
return {
content: {
indices,
fields: [],
},
};
}
try {
const name = indexPattern === '' ? ['*', '*:*'] : `${indexPattern.split(',')}*`;
const body = await esClient.asCurrentUser.indices.resolveIndex({
name,
expand_wildcards: 'open',
});
indices = [
...body.indices.map((i) => i.name),
...body.data_streams.map((d) => d.name),
...body.aliases.map((d) => d.name),
];
} catch (e) {
resources.logger.error(`Error resolving index pattern: ${e.message}`);
indices = [];
}

if (indices.length === 0) {
return {
content: {
indices,
fields: [],
},
};
}
if (indices.length === 0 || indexPattern === '') {
return {
content: {
indices,
fields: [],
},
};
}

const relevantFieldNames = await getRelevantFieldNames({
index,
messages,
esClient: esClient.asCurrentUser,
dataViews: await resources.plugins.dataViews.start(),
savedObjectsClient,
signal,
chat,
});
return {
content: {
indices: [index],
fields: relevantFieldNames.fields,
stats: relevantFieldNames.stats,
},
};
}
);
try {
const relevantFieldNames = await getRelevantFieldNames({
index: indices,
messages,
esClient: esClient.asCurrentUser,
dataViews: await resources.plugins.dataViews.start(),
savedObjectsClient,
signal,
chat,
});
return {
content: {
indices,
fields: relevantFieldNames.fields,
stats: relevantFieldNames.stats,
},
};
} catch (e) {
resources.logger.error(`Error getting relevant field names: ${e.message}`);
return {
content: {
indices,
fields: [],
},
};
}
}
Loading