diff --git a/data/llm_forensic_report/prompt.txt b/data/llm_forensic_report/prompt.txt new file mode 100644 index 0000000000..f55489efde --- /dev/null +++ b/data/llm_forensic_report/prompt.txt @@ -0,0 +1,13 @@ +You are a highly skilled digital forensic analyst. Your task is to analyze a set of security events, which have been identified as potentially significant ("starred events") in a Timesketch investigation. Based on these events, generate a concise forensic report summary, formatted in Markdown. + +Focus on identifying: + +* **Incident Overview:** Provide a brief summary of what appears to have happened based on these events. What type of incident is suggested (e.g., unauthorized access, malware infection, data breach attempt)? +* **Key Findings:** Highlight the most important observations and indicators from the events. Be specific and mention key entities (usernames, IP addresses, file paths, process names) involved. +* **Timeline of Significant Events (Chronological Order):** Briefly outline the sequence of key actions observed in the starred events. +* **Potential Impact/Severity:** Assess the potential impact or severity of the incident based on the available information. +* **Recommended Next Steps:** Suggest 2-3 concrete next steps for the investigation based on your analysis. + +Use bolding (**...**) for key entities and findings. Format the output as a Markdown document. + +Here are the events in JSON format: <events><EVENTS_JSON></events> \ No newline at end of file diff --git a/data/timesketch.conf b/data/timesketch.conf index 1df853cc12..0ae222cb9b 100644 --- a/data/timesketch.conf +++ b/data/timesketch.conf @@ -380,11 +380,17 @@ LLM_PROVIDER_CONFIGS = { }, }, 'llm_summarize': { - 'aistudio': { - 'model': 'gemini-2.0-flash-exp', + 'vertexai': { + 'model': 'gemini-2.0-flash-001', 'project_id': '', }, }, + 'llm_forensic_report': { + 'aistudio': { + 'model': 'gemini-2.0-flash-001', + 'api_key': '', + }, + }, 'default': { 'ollama': { 'server_url': 'http://ollama:11434', @@ -401,3 +407,6 @@ EXAMPLES_NL2Q = '/etc/timesketch/nl2q/examples_nl2q' # LLM event summarization configuration PROMPT_LLM_SUMMARIZATION = '/etc/timesketch/llm_summarize/prompt.txt' + +# LLM starred events to forensic report configuration +PROMPT_LLM_FORENSIC_REPORT = '/etc/timesketch/llm_forensic_report/prompt.txt' diff --git a/docker/dev/build/docker-entrypoint.sh b/docker/dev/build/docker-entrypoint.sh index 118b5a1154..a7222bc573 100755 --- a/docker/dev/build/docker-entrypoint.sh +++ b/docker/dev/build/docker-entrypoint.sh @@ -25,6 +25,7 @@ if [ "$1" = 'timesketch' ]; then ln -s /usr/local/src/timesketch/data/plaso_formatters.yaml /etc/timesketch/plaso_formatters.yaml ln -s /usr/local/src/timesketch/data/nl2q /etc/timesketch/ ln -s /usr/local/src/timesketch/data/llm_summarize /etc/timesketch/ + ln -s /usr/local/src/timesketch/data/llm_forensic_report /etc/timesketch/ # Set SECRET_KEY in /etc/timesketch/timesketch.conf if it isn't already set if grep -q "SECRET_KEY = '<KEY_GOES_HERE>'" /etc/timesketch/timesketch.conf; then diff --git a/timesketch/frontend-ng/src/assets/main.scss b/timesketch/frontend-ng/src/assets/main.scss index 2141a43805..4df44675e4 100644 --- a/timesketch/frontend-ng/src/assets/main.scss +++ b/timesketch/frontend-ng/src/assets/main.scss @@ -208,3 +208,15 @@ html { -o-transition: none !important; transition: none !important; } + +$llm-gradient: linear-gradient(90deg, + #8ab4f8 0%, + #81c995 20%, + #f8c665 40%, + #ec7764 60%, + #b39ddb 80%, + #8ab4f8 100%); + +:root { + --llm-gradient: #{$llm-gradient}; +} diff --git a/timesketch/frontend-ng/src/components/Explore/EventList.vue b/timesketch/frontend-ng/src/components/Explore/EventList.vue index f7d7de62fe..47805fb37f 100644 --- a/timesketch/frontend-ng/src/components/Explore/EventList.vue +++ b/timesketch/frontend-ng/src/components/Explore/EventList.vue @@ -246,6 +246,17 @@ limitations under the License. <v-icon title="Download current view as CSV">mdi-download</v-icon> </v-btn> + <v-btn + icon + @click="generateForensicReport()" + class="ml-2" + :loading="isGeneratingReport" + v-if="isStarredEventsFilterActive"> + <div class="ts-llm-icon-wrapper" v-if="!isGeneratingReport"> + <v-icon title="Generate forensic report with LLM from starred events">mdi-file-document-check</v-icon> + </div> + </v-btn> + <v-menu v-if="!disableSettings" offset-y :close-on-content-click="false"> <template v-slot:activator="{ on, attrs }"> <v-btn icon v-bind="attrs" v-on="on"> @@ -583,6 +594,7 @@ export default { itemsPerPage: this.itemsPerPage, }, isSummaryLoading: false, + isGeneratingReport: false, currentItemsPerPage: this.itemsPerPage, expandedRows: [], selectedFields: [{ field: 'message', type: 'text' }], @@ -621,6 +633,11 @@ export default { } }, computed: { + isStarredEventsFilterActive() { + return this.filterChips.some(chip => + chip.type === 'label' && chip.value === '__ts_star' + ) + }, summaryInfoMessage() { const totalEvents = this.eventList.meta.summary_event_count const uniqueEvents = this.eventList.meta.summary_unique_event_count @@ -948,7 +965,6 @@ export default { } else { this.errorSnackBar(msg) } - console.error('Error message: ' + msg) console.error(e) }) }, @@ -970,6 +986,35 @@ export default { this.isSummaryLoading = false }) }, + generateForensicReport() { + if (this.totalHits > 1000) { + this.warningSnackBar('This feature is currently limited to a 1000 starred events, try setting a timerange filter. ' + + 'This limit will be increased soon.', 10000); + return; + } + + this.isGeneratingReport = true; + const requestData = { + filter: this.currentQueryFilter + }; + + ApiClient.llmRequest(this.sketch.id, 'llm_forensic_report', requestData) + .then((response) => { + this.isGeneratingReport = false; + if (response.data && response.data.story_id) { + this.$store.dispatch('updateSketch', this.sketch.id); + this.successSnackBar('Forensic report generated! You can find it in the "Stories" section.'); + } else { + this.errorSnackBar('Error generating report. No story was created.'); + } + }) + .catch((error) => { + this.isGeneratingReport = false; + const errorMessage = (error.response && error.response.data && error.response.data.message) || 'Unknown error occurred'; + this.errorSnackBar(`Error generating report: ${errorMessage}`); + console.error('Error generating starred events report:', error); + }); + }, exportSearchResult: function () { this.exportDialog = true const now = new Date() @@ -1247,20 +1292,19 @@ th:first-child { padding: 0 0 0 10px !important; } +.ts-event-list-container { + display: flex; + flex-direction: column; + width: 100%; + gap: 20px; +} .ts-ai-summary-card { border: 1px solid transparent !important; border-radius: 8px; background-color: #fafafa; background-image: linear-gradient(white, white), - linear-gradient(90deg, - #8ab4f8 0%, - #81c995 20%, - #f8c665 40%, - #ec7764 60%, - #b39ddb 80%, - #8ab4f8 100% - ); + var(--llm-gradient); background-origin: border-box; background-clip: content-box, border-box; background-size: 300% 100%; @@ -1269,11 +1313,9 @@ th:first-child { display: block; margin-bottom: 20px; } - .v-data-table { display: block; /* Ensure block display for data table */ } - @keyframes borderBeamIridescent-subtle { 0% { background-position: 0% 50%; @@ -1282,25 +1324,16 @@ th:first-child { background-position: 100% 50%; } } - .theme--dark.ts-ai-summary-card { background-color: #1e1e1e; border-color: hsla(0,0%,100%,.12) !important; background-image: linear-gradient(#1e1e1e, #1e1e1e), - linear-gradient(90deg, - #8ab4f8 0%, - #81c995 20%, - #f8c665 40%, - #ec7764 60%, - #b39ddb 80%, - #8ab4f8 100% - ); - box-shadow: 0 2px 5px rgba(255, 255, 255, 0.08); + var(--llm-gradient);; + box-shadow: 0 2px 5px rgba(255, 255, 255, 0.08); display: block; margin-bottom: 20px; } - .ts-ai-summary-text { white-space: pre-line; word-wrap: break-word; @@ -1309,15 +1342,12 @@ th:first-child { padding-left: 10px; padding-right: 10px; } - .ts-ai-summary-card .v-btn--icon { cursor: pointer; } - .ts-ai-summary-card .v-btn--icon:hover { opacity: 0.8; } - .ts-summary-placeholder-line { height: 1em; background-color: #e0e0e0; @@ -1325,21 +1355,17 @@ th:first-child { border-radius: 4px; width: 100%; } - .ts-summary-placeholder-line.short { width: 60%; } - .ts-summary-placeholder-line.long { width: 80%; } - .shimmer { background: linear-gradient(to right, #e0e0e0 8%, #f0f0f0 18%, #e0e0e0 33%); background-size: 800px 100%; animation: shimmer-animation 1.5s infinite linear forwards; } - @keyframes shimmer-animation { 0% { background-position: -468px 0; @@ -1348,32 +1374,44 @@ th:first-child { background-position: 468px 0; } } - -.ts-event-list-container { - display: flex; - flex-direction: column; - width: 100%; - gap: 20px; -} - ::v-deep .no-transition { transition: none !important; } - .ts-ai-summary-card-title { display: flex; align-items: baseline; } - .ts-ai-summary-title { margin-right: 8px; font-weight: normal; } - .ts-ai-summary-subtitle { font-size: 0.7em; color: grey; vertical-align: middle; display: inline-block; } +.ts-llm-icon-wrapper { + position: relative; + display: inline-flex; + align-items: center; + justify-content: center; +} +.ts-llm-icon-wrapper::after { + content: ""; + position: absolute; + top: -4px; + left: -4px; + right: -4px; + bottom: -4px; + border-radius: 50%; + background: var(--llm-gradient); + background-size: 300% 100%; + opacity: 0.2; + animation: borderBeamIridescent-subtle 6s linear infinite; + z-index: -1; +} +.v-btn:hover .ts-llm-icon-wrapper::after { + opacity: 0.4; +} </style> diff --git a/timesketch/frontend-ng/src/mixins/snackBar.js b/timesketch/frontend-ng/src/mixins/snackBar.js index 97ac9bc918..efc021709c 100644 --- a/timesketch/frontend-ng/src/mixins/snackBar.js +++ b/timesketch/frontend-ng/src/mixins/snackBar.js @@ -23,32 +23,35 @@ const defaultSnackBar = { "timeout": defaultTimeout } -// These methids will be available to all components without any further imports. +// These methods will be available to all components without any further imports. Vue.mixin({ methods: { - successSnackBar(message) { + successSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "success" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - errorSnackBar(message) { + errorSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "error" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - warningSnackBar(message) { + warningSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "warning" + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, - infoSnackBar(message) { + infoSnackBar(message, timeout) { let snackbar = defaultSnackBar snackbar.message = message snackbar.color = "info" - snackbar.timeout = 2000 + snackbar.timeout = timeout || defaultTimeout this.$store.dispatch('setSnackBar', snackbar) }, } diff --git a/timesketch/lib/llms/actions.py b/timesketch/lib/llms/actions.py new file mode 100644 index 0000000000..5071b7a7c7 --- /dev/null +++ b/timesketch/lib/llms/actions.py @@ -0,0 +1,57 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Actions for LLM features in Timesketch.""" +import json +import logging +import time +from timesketch.models import db_session +from timesketch.models.sketch import Sketch, Story + +logger = logging.getLogger("timesketch.llm.actions") + + +def create_story(sketch: Sketch, content: str, title: str = None) -> int: + """Creates a Timesketch story with the given content. + Args: + sketch: Sketch object. + content: Text content to add to the story. + title: Title for the story. If None, a default title + with timestamp will be used. + Returns: + The ID of the newly created story. + Raises: + ValueError: If there's an error creating the story. + """ + if title is None: + title = f"AI Generated Report - {time.strftime('%Y-%m-%d %H:%M')}" + try: + story = Story(title=title, sketch=sketch, user=sketch.user) + content_blocks = [ + { + "componentName": "", + "componentProps": {}, + "content": content, + "edit": False, + "showPanel": False, + "isActive": False, + } + ] + story.content = json.dumps(content_blocks) + db_session.add(story) + db_session.commit() + logger.debug("Created story with ID %s for sketch %s", story.id, sketch.id) + return story.id + except Exception as e: + logger.error("Error creating story: %s", e) + raise ValueError(f"Error creating story: {e}") from e diff --git a/timesketch/lib/llms/features/llm_forensic_report.py b/timesketch/lib/llms/features/llm_forensic_report.py new file mode 100644 index 0000000000..197ac87eca --- /dev/null +++ b/timesketch/lib/llms/features/llm_forensic_report.py @@ -0,0 +1,303 @@ +# Copyright 2025 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""LLM Forensic Report feature.""" +import json +import logging +import time +from typing import Any, Dict, List, Optional + +import pandas as pd +import prometheus_client +from flask import current_app +from opensearchpy import OpenSearch + +from timesketch.lib import utils +from timesketch.api.v1 import export +from timesketch.models.sketch import Sketch +from timesketch.lib.llms import actions +from timesketch.lib.definitions import METRICS_NAMESPACE +from timesketch.lib.llms.features.interface import LLMFeatureInterface + +logger = logging.getLogger("timesketch.llm.forensic_report_feature") + +METRICS = { + "llm_forensic_report_events_processed_total": prometheus_client.Counter( + "llm_forensic_report_events_processed_total", + "Total number of events processed for LLM forensic reports", + ["sketch_id"], + namespace=METRICS_NAMESPACE, + ), + "llm_forensic_report_unique_events_total": prometheus_client.Counter( + "llm_forensic_report_unique_events_total", + "Total number of unique events sent to the LLM for forensic report generation", + ["sketch_id"], + namespace=METRICS_NAMESPACE, + ), + "llm_forensic_report_stories_created_total": prometheus_client.Counter( + "llm_forensic_report_stories_created_total", + "Total number of forensic report stories created", + ["sketch_id"], + namespace=METRICS_NAMESPACE, + ), +} + + +class LLMForensicReportFeature(LLMFeatureInterface): + """LLM Forensic Report feature.""" + + NAME = "llm_forensic_report" + PROMPT_CONFIG_KEY = "PROMPT_LLM_FORENSIC_REPORT" + RESPONSE_SCHEMA = { + "type": "object", + "properties": { + "summary": { + "type": "string", + "description": "Detailed forensic report summary of the events", + } + }, + "required": ["summary"], + } + + def _get_prompt_text(self, events_dict: List[Dict[str, Any]]) -> str: + """Reads the prompt template from file and injects events. + Args: + events_dict: List of event dictionaries to inject into prompt. + Returns: + Complete prompt text with injected events. + Raises: + ValueError: If the prompt path is not configured or placeholder is missing. + FileNotFoundError: If the prompt file cannot be found. + IOError: If there's an error reading the prompt file. + """ + prompt_file_path = current_app.config.get(self.PROMPT_CONFIG_KEY) + if not prompt_file_path: + logger.error("%s config not set", self.PROMPT_CONFIG_KEY) + raise ValueError("LLM forensic report prompt path not configured.") + + try: + with open(prompt_file_path, "r", encoding="utf-8") as file_handle: + prompt_template = file_handle.read() + except FileNotFoundError as exc: + logger.error("Forensic report prompt file not found: %s", prompt_file_path) + raise FileNotFoundError( + f"LLM Prompt file not found: {prompt_file_path}" + ) from exc + except IOError as e: + logger.error("Error reading prompt file: %s", e) + raise IOError("Error reading LLM prompt file.") from e + + if "<EVENTS_JSON>" not in prompt_template: + logger.error("Prompt template is missing the <EVENTS_JSON> placeholder") + raise ValueError( + "LLM forensic report prompt template is missing the " + "required <EVENTS_JSON> placeholder." + ) + + prompt_text = prompt_template.replace("<EVENTS_JSON>", json.dumps(events_dict)) + return prompt_text + + def _run_timesketch_query( + self, + sketch: Sketch, + query_string: str = "*", + query_filter: Optional[Dict] = None, + id_list: Optional[List] = None, + datastore: Optional[OpenSearch] = None, + timeline_ids: Optional[List] = None, + ) -> pd.DataFrame: + """Runs a timesketch query and returns results as a DataFrame. + Args: + sketch: The Sketch object to query. + query_string: Search query string. + query_filter: Dictionary with filter parameters. + id_list: List of event IDs to retrieve. + datastore: OpenSearch instance for querying. + timeline_ids: List of timeline IDs to query. + Returns: + pd.DataFrame: DataFrame containing query results. + Raises: + ValueError: If datastore is not provided or no valid indices are found. + """ + if datastore is None: + raise ValueError("Datastore must be provided.") + + if not query_filter: + query_filter = {} + + if id_list: + id_query = " OR ".join([f'_id:"{event_id}"' for event_id in id_list]) + query_string = id_query + + all_indices = list({t.searchindex.index_name for t in sketch.timelines}) + indices_from_filter = query_filter.get("indices", all_indices) + + if "_all" in indices_from_filter: + indices_from_filter = all_indices + + indices, timeline_ids = utils.get_validated_indices(indices_from_filter, sketch) + + if not indices: + raise ValueError( + "No valid search indices were found to perform the search on." + ) + + result = datastore.search( + sketch_id=sketch.id, + query_string=query_string, + query_filter=query_filter, + query_dsl="", + indices=indices, + timeline_ids=timeline_ids, + ) + + return export.query_results_to_dataframe(result, sketch) + + def generate_prompt(self, sketch: Sketch, **kwargs: Any) -> str: + """Generates the forensic report prompt based on events from a query. + Args: + sketch: The Sketch object containing events to analyze. + **kwargs: Additional arguments including: + - form: Form data containing query and filter information. + - datastore: OpenSearch instance for querying. + - timeline_ids: List of timeline IDs to query. + Returns: + str: Generated prompt text with events to analyze. + Raises: + ValueError: If required parameters are missing or if no events are found. + """ + form = kwargs.get("form") + datastore = kwargs.get("datastore") + timeline_ids = kwargs.get("timeline_ids") + + if not form: + raise ValueError("Missing 'form' data in kwargs") + + query_filter = form.get("filter", {}) + query_string = form.get("query", "*") or "*" + + events_df = self._run_timesketch_query( + sketch, + query_string, + query_filter, + datastore=datastore, + timeline_ids=timeline_ids, + ) + + if events_df is None or events_df.empty: + return "No events to analyze for forensic report." + + events_df["datetime_str"] = events_df["datetime"].astype(str) + events_df["combined_key"] = events_df["datetime_str"] + events_df["message"] + unique_df = events_df.drop_duplicates(subset="combined_key", keep="first") + + events_dict = ( + unique_df[["datetime_str", "message"]] + .rename(columns={"datetime_str": "datetime"}) + .to_dict(orient="records") + ) + + total_events_count = len(events_df) + unique_events_count = len(unique_df) + + METRICS["llm_forensic_report_events_processed_total"].labels( + sketch_id=str(sketch.id) + ).inc(total_events_count) + + METRICS["llm_forensic_report_unique_events_total"].labels( + sketch_id=str(sketch.id) + ).inc(unique_events_count) + + if not events_dict: + return "No events to analyze for forensic report." + + return self._get_prompt_text(events_dict) + + def process_response(self, llm_response: Any, **kwargs: Any) -> Dict[str, Any]: + """Processes the LLM response and creates a Story in the sketch. + Args: + llm_response: The response from the LLM model, expected to be a dictionary. + **kwargs: Additional arguments including: + - sketch_id: ID of the sketch being processed. + - sketch: The Sketch object. + - form: Form data containing query and filter information. + - datastore: OpenSearch instance for querying. + - timeline_ids: List of timeline IDs to query. + Returns: + Dictionary containing the processed response: + - summary: The forensic report text + - summary_event_count: Total number of events analyzed + - summary_unique_event_count: Number of unique events analyzed + - story_id: ID of the created story + Raises: + ValueError: If required parameters are missing or if the LLM response + is not in the expected format. + """ + sketch = kwargs.get("sketch") + form = kwargs.get("form") + datastore = kwargs.get("datastore") + timeline_ids = kwargs.get("timeline_ids") + + if not sketch: + raise ValueError("Missing 'sketch' in kwargs") + + if not form: + raise ValueError("Missing 'form' data in kwargs") + + if not isinstance(llm_response, dict): + raise ValueError("LLM response is expected to be a dictionary") + + summary_text = llm_response.get("summary") + if summary_text is None: + raise ValueError("LLM response missing 'summary' key") + + query_filter = form.get("filter", {}) + query_string = form.get("query", "*") or "*" + + events_df = self._run_timesketch_query( + sketch, + query_string, + query_filter, + datastore=datastore, + timeline_ids=timeline_ids, + ) + + total_events_count = len(events_df) + + events_df["combined_key"] = ( + events_df["datetime"].astype(str) + events_df["message"] + ) + unique_events_count = len( + events_df.drop_duplicates(subset="combined_key", keep="first") + ) + + try: + story_title = f"Forensic Report - {time.strftime('%Y-%m-%d %H:%M')}" + story_id = actions.create_story( + sketch=sketch, content=summary_text, title=story_title + ) + METRICS["llm_forensic_report_stories_created_total"].labels( + sketch_id=str(sketch.id) + ).inc() + except Exception as e: + logger.error("Error creating story for forensic report: %s", e) + raise ValueError( + f"Error creating story to save forensic report: {e}" + ) from e + + return { + "summary": summary_text, + "summary_event_count": total_events_count, + "summary_unique_event_count": unique_events_count, + "story_id": story_id, + }