Cinnamon · phv2312 · Dec 24, 2024 · Dec 23, 2024 · Dec 23, 2024 · Dec 23, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -11,3 +11,5 @@ env/
 README.md
 *.zip
 *.sh
+
+!/launch.sh
diff --git a/.env.example b/.env.example
@@ -3,8 +3,8 @@
 # settings for OpenAI
 OPENAI_API_BASE=https://api.openai.com/v1
 OPENAI_API_KEY=<YOUR_OPENAI_KEY>
-OPENAI_CHAT_MODEL=gpt-3.5-turbo
-OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002
+OPENAI_CHAT_MODEL=gpt-4o-mini
+OPENAI_EMBEDDINGS_MODEL=text-embedding-3-large
 
 # settings for Azure OpenAI
 AZURE_OPENAI_ENDPOINT=
@@ -19,8 +19,6 @@ COHERE_API_KEY=<COHERE_API_KEY>
 # settings for local models
 LOCAL_MODEL=llama3.1:8b
 LOCAL_MODEL_EMBEDDINGS=nomic-embed-text
-LOCAL_EMBEDDING_MODEL_DIM = 768
-LOCAL_EMBEDDING_MODEL_MAX_TOKENS = 8192
 
 # settings for GraphRAG
 GRAPHRAG_API_KEY=<YOUR_OPENAI_KEY>

diff --git a/.github/workflows/build-push-docker.yaml b/.github/workflows/build-push-docker.yaml
@@ -54,11 +54,11 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-        with:
-          image: tonistiigi/binfmt:latest
-          platforms: arm64,arm
+      # - name: Set up QEMU
+      #   uses: docker/setup-qemu-action@v3
+      #   with:
+      #     image: tonistiigi/binfmt:latest
+      #     platforms: arm64,arm
 
       - name: Set up Docker Buildx
         id: buildx
@@ -94,7 +94,7 @@ jobs:
           file: Dockerfile
           context: .
           push: true
-          platforms: linux/amd64, linux/arm64
+          platforms: linux/amd64 #, linux/arm64
           tags: |
             ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}

diff --git a/.github/workflows/fly-deploy.yml b/.github/workflows/fly-deploy.yml
@@ -0,0 +1,18 @@
+# See https://fly.io/docs/app-guides/continuous-deployment-with-github-actions/
+
+name: Fly Deploy
+on:
+  push:
+    branches:
+      - main
+jobs:
+  deploy:
+    name: Deploy app
+    runs-on: ubuntu-latest
+    concurrency: deploy-group # optional: ensure only one action runs at a time
+    steps:
+      - uses: actions/checkout@v4
+      - uses: superfly/flyctl-actions/setup-flyctl@master
+      - run: flyctl deploy --remote-only
+        env:
+          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
diff --git a/Dockerfile b/Dockerfile
@@ -35,13 +35,15 @@ RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR
 
 # Copy contents
 COPY . /app
+COPY launch.sh /app/launch.sh
 COPY .env.example /app/.env
 
 # Install pip packages
 RUN --mount=type=ssh  \
     --mount=type=cache,target=/root/.cache/pip  \
     pip install -e "libs/kotaemon" \
     && pip install -e "libs/ktem" \
+    && pip install "prometheus_client" \
     && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements"
 
 RUN --mount=type=ssh  \
@@ -97,7 +99,4 @@ RUN apt-get autoremove \
     && rm -rf /var/lib/apt/lists/* \
     && rm -rf ~/.cache
 
-# Download nltk packages as required for unstructured
-# RUN python -c "from unstructured.nlp.tokenize import _download_nltk_packages_if_not_present; _download_nltk_packages_if_not_present()"
-
-CMD ["python", "app.py"]
+ENTRYPOINT ["sh", "/app/launch.sh"]
diff --git a/docs/about.md b/docs/about.md
@@ -4,8 +4,8 @@ An open-source tool for chatting with your documents. Built with both end users
 developers in mind.
 
 [Source Code](https://github.com/Cinnamon/kotaemon) |
-[Live Demo](https://huggingface.co/spaces/cin-model/kotaemon-demo)
+[HF Space](https://huggingface.co/spaces/cin-model/kotaemon-demo)
 
-[User Guide](https://cinnamon.github.io/kotaemon/) |
+[Installation Guide](https://cinnamon.github.io/kotaemon/) |
 [Developer Guide](https://cinnamon.github.io/kotaemon/development/) |
 [Feedback](https://github.com/Cinnamon/kotaemon/issues)
diff --git a/flowsettings.py b/flowsettings.py
@@ -25,7 +25,7 @@
     except Exception:
         KH_APP_VERSION = "local"
 
-KH_ENABLE_FIRST_SETUP = True
+KH_ENABLE_FIRST_SETUP = config("KH_ENABLE_FIRST_SETUP", default=True, cast=bool)
 KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool)
 KH_OLLAMA_URL = config("KH_OLLAMA_URL", default="http://localhost:11434/v1/")
 
@@ -65,6 +65,8 @@
 KH_DOC_DIR = this_dir / "docs"
 
 KH_MODE = "dev"
+KH_SSO_ENABLED = config("KH_SSO_ENABLED", default=False, cast=bool)
+
 KH_FEATURE_CHAT_SUGGESTION = config(
     "KH_FEATURE_CHAT_SUGGESTION", default=False, cast=bool
 )
@@ -137,31 +139,36 @@
             "default": False,
         }
 
-if config("OPENAI_API_KEY", default=""):
+OPENAI_DEFAULT = "<YOUR_OPENAI_KEY>"
+OPENAI_API_KEY = config("OPENAI_API_KEY", default=OPENAI_DEFAULT)
+GOOGLE_API_KEY = config("GOOGLE_API_KEY", default="your-key")
+IS_OPENAI_DEFAULT = len(OPENAI_API_KEY) > 0 and OPENAI_API_KEY != OPENAI_DEFAULT
+
+if OPENAI_API_KEY:
     KH_LLMS["openai"] = {
         "spec": {
             "__type__": "kotaemon.llms.ChatOpenAI",
             "temperature": 0,
             "base_url": config("OPENAI_API_BASE", default="")
             or "https://api.openai.com/v1",
-            "api_key": config("OPENAI_API_KEY", default=""),
-            "model": config("OPENAI_CHAT_MODEL", default="gpt-3.5-turbo"),
+            "api_key": OPENAI_API_KEY,
+            "model": config("OPENAI_CHAT_MODEL", default="gpt-4o-mini"),
             "timeout": 20,
         },
-        "default": True,
+        "default": IS_OPENAI_DEFAULT,
     }
     KH_EMBEDDINGS["openai"] = {
         "spec": {
             "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
             "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"),
-            "api_key": config("OPENAI_API_KEY", default=""),
+            "api_key": OPENAI_API_KEY,
             "model": config(
-                "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002"
+                "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-3-large"
             ),
             "timeout": 10,
             "context_length": 8191,
         },
-        "default": True,
+        "default": IS_OPENAI_DEFAULT,
     }
 
 if config("LOCAL_MODEL", default=""):
@@ -205,9 +212,9 @@
     "spec": {
         "__type__": "kotaemon.llms.chats.LCGeminiChat",
         "model_name": "gemini-1.5-flash",
-        "api_key": config("GOOGLE_API_KEY", default="your-key"),
+        "api_key": GOOGLE_API_KEY,
     },
-    "default": False,
+    "default": not IS_OPENAI_DEFAULT,
 }
 KH_LLMS["groq"] = {
     "spec": {
@@ -241,8 +248,9 @@
     "spec": {
         "__type__": "kotaemon.embeddings.LCGoogleEmbeddings",
         "model": "models/text-embedding-004",
-        "google_api_key": config("GOOGLE_API_KEY", default="your-key"),
-    }
+        "google_api_key": GOOGLE_API_KEY,
+    },
+    "default": not IS_OPENAI_DEFAULT,
 }
 # KH_EMBEDDINGS["huggingface"] = {
 #     "spec": {
@@ -301,9 +309,12 @@
 
 USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool)
 USE_LIGHTRAG = config("USE_LIGHTRAG", default=True, cast=bool)
+USE_MS_GRAPHRAG = config("USE_MS_GRAPHRAG", default=False, cast=bool)
 
-GRAPHRAG_INDEX_TYPES = ["ktem.index.file.graph.GraphRAGIndex"]
+GRAPHRAG_INDEX_TYPES = []
 
+if USE_MS_GRAPHRAG:
+    GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.GraphRAGIndex")
 if USE_NANO_GRAPHRAG:
     GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.NanoGraphRAGIndex")
 if USE_LIGHTRAG:
@@ -323,7 +334,7 @@
                 ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
                 ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
             ),
-            "private": False,
+            "private": True,
         },
         "index_type": graph_type,
     }
@@ -338,7 +349,7 @@
                 ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
                 ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
             ),
-            "private": False,
+            "private": True,
         },
         "index_type": "ktem.index.file.FileIndex",
     },

diff --git a/fly.toml b/fly.toml
@@ -0,0 +1,41 @@
+# fly.toml app configuration file generated for kotaemon-kan-test on 2025-01-06T22:43:53+07:00
+#
+# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
+#
+
+app = 'kotaemon-kan-test'
+primary_region = 'sin'
+
+[build]
+
+[env]
+  KH_FEATURE_USER_MANAGEMENT = false
+  PROMETHEUS_ENABLE = true
+  PROMETHEUS_PORT = 8000
+  PROMETHEUS_HOST = '0.0.0.0'
+
+[[mounts]]
+  source = 'ktem_volume'
+  destination = '/app/ktem_app_data'
+
+[http_service]
+  internal_port = 7860
+  force_https = true
+  auto_stop_machines = 'suspend'
+  auto_start_machines = true
+  min_machines_running = 0
+  processes = ['app']
+
+[metrics]
+port = 8000
+path = "/metrics"
+
+[[vm]]
+  memory = '4gb'
+  cpu_kind = 'shared'
+  cpus = 4
+
+[metrics]
+  port = 8000
+  path = "/metrics"
+
diff --git a/launch.sh b/launch.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+if [ -z "$GRADIO_SERVER_NAME" ]; then
+    export GRADIO_SERVER_NAME="0.0.0.0"
+fi
+if [ -z "$GRADIO_SERVER_PORT" ]; then
+    export GRADIO_SERVER_PORT="7860"
+fi
+
+# Check if environment variable KH_DEMO_MODE is set to true
+if [ "$KH_DEMO_MODE" = "true" ]; then
+    echo "KH_DEMO_MODE is true. Launching in demo mode..."
+    # Command to launch in demo mode
+    GR_FILE_ROOT_PATH="/app" KH_FEATURE_USER_MANAGEMENT=false USE_LIGHTRAG=false uvicorn sso_app_demo:app --host "$GRADIO_SERVER_NAME" --port "$GRADIO_SERVER_PORT"
+else
+    python app.py
+fi
diff --git a/libs/kotaemon/kotaemon/indices/qa/citation_qa.py b/libs/kotaemon/kotaemon/indices/qa/citation_qa.py
@@ -3,6 +3,7 @@
 from typing import Generator
 
 import numpy as np
+from decouple import config
 from theflow.settings import settings as flowsettings
 
 from kotaemon.base import (
@@ -32,7 +33,9 @@
 
 MAX_IMAGES = 10
 CITATION_TIMEOUT = 5.0
-CONTEXT_RELEVANT_WARNING_SCORE = 0.7
+CONTEXT_RELEVANT_WARNING_SCORE = config(
+    "CONTEXT_RELEVANT_WARNING_SCORE", 0.3, cast=float
+)
 
 DEFAULT_QA_TEXT_PROMPT = (
     "Use the following pieces of context to answer the question at the end in detail with clear explanation. "  # noqa: E501
@@ -385,7 +388,9 @@ def prepare_citations(self, answer, docs) -> tuple[list[Document], list[Document
             doc = id2docs[id_]
             doc_score = doc.metadata.get("llm_trulens_score", 0.0)
             is_open = not has_llm_score or (
-                doc_score > CONTEXT_RELEVANT_WARNING_SCORE and len(with_citation) == 0
+                doc_score
+                > CONTEXT_RELEVANT_WARNING_SCORE
+                # and len(with_citation) == 0
             )
             without_citation.append(
                 Document(

diff --git a/libs/kotaemon/kotaemon/indices/qa/utils.py b/libs/kotaemon/kotaemon/indices/qa/utils.py
@@ -2,6 +2,8 @@
 
 
 def find_text(search_span, context, min_length=5):
+    search_span, context = search_span.lower(), context.lower()
+
     sentence_list = search_span.split("\n")
     context = context.replace("\n", " ")
 
@@ -18,7 +20,7 @@ def find_text(search_span, context, min_length=5):
 
             matched_blocks = []
             for _, start, length in match_results:
-                if length > max(len(sentence) * 0.2, min_length):
+                if length > max(len(sentence) * 0.25, min_length):
                     matched_blocks.append((start, start + length))
 
             if matched_blocks:
@@ -42,6 +44,9 @@ def find_text(search_span, context, min_length=5):
 def find_start_end_phrase(
     start_phrase, end_phrase, context, min_length=5, max_excerpt_length=300
 ):
+    start_phrase, end_phrase = start_phrase.lower(), end_phrase.lower()
+    context = context.lower()
+
     context = context.replace("\n", " ")
 
     matches = []

diff --git a/libs/kotaemon/kotaemon/indices/vectorindex.py b/libs/kotaemon/kotaemon/indices/vectorindex.py
@@ -177,7 +177,11 @@ def run(
             ]
         elif self.retrieval_mode == "text":
             query = text.text if isinstance(text, Document) else text
-            docs = self.doc_store.query(query, top_k=top_k_first_round, doc_ids=scope)
+            docs = []
+            if scope:
+                docs = self.doc_store.query(
+                    query, top_k=top_k_first_round, doc_ids=scope
+                )
             result = [RetrievedDocument(**doc.to_dict(), score=-1.0) for doc in docs]
         elif self.retrieval_mode == "hybrid":
             # similarity search section
@@ -206,9 +210,10 @@ def query_docstore():
 
                 assert self.doc_store is not None
                 query = text.text if isinstance(text, Document) else text
-                ds_docs = self.doc_store.query(
-                    query, top_k=top_k_first_round, doc_ids=scope
-                )
+                if scope:
+                    ds_docs = self.doc_store.query(
+                        query, top_k=top_k_first_round, doc_ids=scope
+                    )
 
             vs_query_thread = threading.Thread(target=query_vectorstore)
             ds_query_thread = threading.Thread(target=query_docstore)

diff --git a/libs/kotaemon/kotaemon/loaders/pdf_loader.py b/libs/kotaemon/kotaemon/loaders/pdf_loader.py
@@ -3,15 +3,18 @@
 from pathlib import Path
 from typing import Dict, List, Optional
 
+from decouple import config
 from fsspec import AbstractFileSystem
 from llama_index.readers.file import PDFReader
 from PIL import Image
 
 from kotaemon.base import Document
 
+PDF_LOADER_DPI = config("PDF_LOADER_DPI", default=40, cast=int)
+
 
 def get_page_thumbnails(
-    file_path: Path, pages: list[int], dpi: int = 80
+    file_path: Path, pages: list[int], dpi: int = PDF_LOADER_DPI
 ) -> List[Image.Image]:
     """Get image thumbnails of the pages in the PDF file.
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,5 @@ env/ @@
     README.md
     *.zip
     *.sh
+    !/launch.sh