Add Ollama provider with response schema support & create LLM provider directory (#3306)

itsmvd · web-flow · commit 2c3337c3333b · 2025-02-26T09:55:37.000+01:00
* add response_schema support to ollama.py
* Create separate llm provider directory, add response_schema to ollama provider
* Update timesketch.conf
diff --git a/data/timesketch.conf b/data/timesketch.conf
@@ -379,16 +379,16 @@ LLM_PROVIDER_CONFIGS = {
             'project_id': '',
         },
     },
-    'llm_summarization': {
+    'llm_summarize': {
         'aistudio': {
             'model': 'gemini-2.0-flash-exp',
             'project_id': '',
         },
     },
     'default': {
-        'aistudio': {
-             'api_key': '',
-             'model': 'gemini-2.0-flash-exp',
+        'ollama': {
+            'server_url': 'http://ollama:11434',
+            'model': 'gemma:7b',
         },
     }
 }
diff --git a/timesketch/api/v1/resources/llm_summarize.py b/timesketch/api/v1/resources/llm_summarize.py
@@ -28,7 +28,8 @@
 from flask_restful import Resource
 
 from timesketch.api.v1 import resources, export
-from timesketch.lib import definitions, llms, utils
+from timesketch.lib import definitions, utils
+from timesketch.lib.llms.providers import manager as provider_manager
 from timesketch.lib.definitions import METRICS_NAMESPACE
 from timesketch.models.sketch import Sketch
 
@@ -304,8 +305,8 @@ def _get_content(
                 configured LLM provider
         """
         try:
-            feature_name = "llm_summarization"
-            llm = llms.manager.LLMManager.create_provider(feature_name=feature_name)
+            feature_name = "llm_summarize"
+            llm = provider_manager.LLMManager.create_provider(feature_name=feature_name)
         except Exception as e:  # pylint: disable=broad-except
             logger.error("Error LLM Provider: %s", e)
             abort(
diff --git a/timesketch/api/v1/resources/nl2q.py b/timesketch/api/v1/resources/nl2q.py
@@ -26,7 +26,7 @@
 import pandas as pd
 
 from timesketch.api.v1 import utils
-from timesketch.lib.llms import manager
+from timesketch.lib.llms.providers import manager
 from timesketch.lib.definitions import HTTP_STATUS_CODE_BAD_REQUEST
 from timesketch.lib.definitions import HTTP_STATUS_CODE_INTERNAL_SERVER_ERROR
 from timesketch.lib.definitions import HTTP_STATUS_CODE_NOT_FOUND
diff --git a/timesketch/api/v1/resources_test.py b/timesketch/api/v1/resources_test.py
@@ -1198,7 +1198,7 @@ class TestNl2qResource(BaseTest):
 
     resource_url = "/api/v1/sketches/1/nl2q/"
 
-    @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
     @mock.patch("timesketch.api.v1.utils.run_aggregator")
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
     def test_nl2q_prompt(self, mock_aggregator, mock_create_provider):
@@ -1380,7 +1380,7 @@ def test_nl2q_no_permission(self):
         )
         self.assertEqual(response.status_code, HTTP_STATUS_CODE_FORBIDDEN)
 
-    @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
     @mock.patch("timesketch.api.v1.utils.run_aggregator")
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
     def test_nl2q_llm_error(self, mock_aggregator, mock_create_provider):
@@ -1584,7 +1584,7 @@ def test_llm_summarize_no_events(self):
         )
 
     @mock.patch("timesketch.api.v1.resources.OpenSearchDataStore", MockDataStore)
-    @mock.patch("timesketch.lib.llms.manager.LLMManager.create_provider")
+    @mock.patch("timesketch.lib.llms.providers.manager.LLMManager.create_provider")
     def test_llm_summarize_with_events(self, mock_create_provider):
         """Test LLM summarizer with events returned and mock LLM."""
         self.login()
diff --git a/timesketch/lib/llms/__init__.py b/timesketch/lib/llms/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Google Inc. All rights reserved.
+# Copyright 2025 Google Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,8 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""LLM module for Timesketch."""
-
-from timesketch.lib.llms import ollama
-from timesketch.lib.llms import vertexai
-from timesketch.lib.llms import aistudio
+"""LLM libraries for Timesketch."""
diff --git a/timesketch/lib/llms/ollama.py b/timesketch/lib/llms/ollama.py
diff --git a/timesketch/lib/llms/providers/__init__.py b/timesketch/lib/llms/providers/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2024 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""LLM providers for Timesketch."""
+
+from timesketch.lib.llms.providers import ollama
+from timesketch.lib.llms.providers import vertexai
+from timesketch.lib.llms.providers import aistudio
diff --git a/timesketch/lib/llms/providers/aistudio.py b/timesketch/lib/llms/providers/aistudio.py
@@ -1,4 +1,4 @@
-# Copyright 2024 Google Inc. All rights reserved.
+# Copyright 2025 Google Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,8 +15,8 @@
 
 import json
 from typing import Optional
-from timesketch.lib.llms import interface
-from timesketch.lib.llms import manager
+from timesketch.lib.llms.providers import interface
+from timesketch.lib.llms.providers import manager
 
 
 # Check if the required dependencies are installed.
diff --git a/timesketch/lib/llms/providers/interface.py b/timesketch/lib/llms/providers/interface.py
diff --git a/timesketch/lib/llms/providers/manager.py b/timesketch/lib/llms/providers/manager.py
@@ -14,7 +14,7 @@
 """This file contains a class for managing Large Language Model (LLM) providers."""
 
 from flask import current_app
-from timesketch.lib.llms.interface import LLMProvider
+from timesketch.lib.llms.providers.interface import LLMProvider
 
 
 class LLMManager:
@@ -80,7 +80,6 @@ def create_provider(cls, feature_name: str = None, **kwargs) -> LLMProvider:
             raise ValueError(
                 "Configuration for the feature must specify exactly one provider."
             )
-
         provider_name = next(iter(config_mapping))
         provider_config = config_mapping[provider_name]
 
diff --git a/timesketch/lib/llms/providers/manager_test.py b/timesketch/lib/llms/providers/manager_test.py
@@ -14,7 +14,7 @@
 """Tests for LLM provider manager."""
 
 from timesketch.lib.testlib import BaseTest
-from timesketch.lib.llms import manager
+from timesketch.lib.llms.providers import manager
 
 
 class MockAistudioProvider:
diff --git a/timesketch/lib/llms/providers/ollama.py b/timesketch/lib/llms/providers/ollama.py
@@ -0,0 +1,94 @@
+# Copyright 2025 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A LLM provider for the Ollama server."""
+from typing import Optional
+import json
+import requests
+
+from timesketch.lib.llms.providers import interface
+from timesketch.lib.llms.providers import manager
+
+
+class Ollama(interface.LLMProvider):
+    """A LLM provider for the Ollama server."""
+
+    NAME = "ollama"
+
+    def _post(self, request_body: str) -> requests.Response:
+        """
+        Make a POST request to the Ollama server.
+
+        Args:
+            request_body: The body of the request in JSON format.
+
+        Returns:
+            The response from the server as a requests.Response object.
+        """
+        api_resource = "/api/chat"
+        url = self.config.get("server_url") + api_resource
+        return requests.post(
+            url, data=request_body, headers={"Content-Type": "application/json"}
+        )
+
+    def generate(self, prompt: str, response_schema: Optional[dict] = None) -> str:
+        """
+        Generate text using the Ollama server, optionally with a JSON schema.
+
+        Args:
+            prompt: The prompt to use for the generation.
+            response_schema: An optional JSON schema to define the expected
+                response format.
+
+        Returns:
+            The generated text as a string (or parsed data if
+            response_schema is provided).
+
+        Raises:
+            ValueError: If the request fails or JSON parsing fails.
+        """
+        request_body = {
+            "messages": [{"role": "user", "content": prompt}],
+            "model": self.config.get("model"),
+            "stream": self.config.get("stream"),
+            "options": {
+                "temperature": self.config.get("temperature"),
+                "num_predict": self.config.get("max_output_tokens"),
+                "top_p": self.config.get("top_p"),
+                "top_k": self.config.get("top_k"),
+            },
+        }
+
+        if response_schema:
+            request_body["format"] = response_schema
+
+        response = self._post(json.dumps(request_body))
+
+        if response.status_code != 200:
+            raise ValueError(f"Error generating text: {response.text}")
+
+        response_data = response.json()
+        text_response = response_data.get("message", {}).get("content", "").strip()
+
+        if response_schema:
+            try:
+                return json.loads(text_response)
+            except json.JSONDecodeError as error:
+                raise ValueError(
+                    f"Error JSON parsing text: {text_response}: {error}"
+                ) from error
+
+        return text_response
+
+
+manager.LLMManager.register_provider(Ollama)
diff --git a/timesketch/lib/llms/providers/vertexai.py b/timesketch/lib/llms/providers/vertexai.py
@@ -16,8 +16,8 @@
 import json
 from typing import Optional
 
-from timesketch.lib.llms import interface
-from timesketch.lib.llms import manager
+from timesketch.lib.llms.providers import interface
+from timesketch.lib.llms.providers import manager
 
 # Check if the required dependencies are installed.
 has_required_deps = True