Skip to content

Commit

Permalink
feat: allow to use customized GraphRAG settings.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
amaler committed Oct 14, 2024
1 parent 6906ab2 commit 046d24f
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 6 deletions.
26 changes: 21 additions & 5 deletions libs/ktem/ktem/index/file/graph/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from typing import Generator
from uuid import uuid4

import dotenv
import pandas as pd
import tiktoken
import yaml
from decouple import config
from ktem.db.models import engine
from sqlalchemy.orm import Session
from theflow.settings import settings
Expand Down Expand Up @@ -119,8 +120,7 @@ def call_graphrag_index(self, input_path: str):
command = command[:-1]

# copy customized GraphRAG config file if it exists
setting = dotenv.dotenv_values(".env")
if setting.get("USE_CUSTOMIZED_GRAPHRAG_SETTING") in ["true", "True"]:
if config("USE_CUSTOMIZED_GRAPHRAG_SETTING", default="value").lower() == "true":
setting_file_path = os.path.join(os.getcwd(), "settings.yaml.example")
destination_file_path = os.path.join(input_path, "settings.yaml")
try:
Expand Down Expand Up @@ -234,12 +234,28 @@ def _build_graph_search(self):
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)

# initialize default settings
embedding_model = os.getenv(
"GRAPHRAG_EMBEDDING_MODEL", "text-embedding-3-small"
)
embedding_api_key = os.getenv("GRAPHRAG_API_KEY")
embedding_api_base = None

# use customized GraphRAG settings if the flag is set
if config("USE_CUSTOMIZED_GRAPHRAG_SETTING", default="value").lower() == "true":
settings_yaml_path = Path(root_path) / "settings.yaml"
with open(settings_yaml_path, "r") as f:
settings = yaml.safe_load(f)
if settings["embeddings"]["llm"]["model"]:
embedding_model = settings["embeddings"]["llm"]["model"]
if settings["embeddings"]["llm"]["api_key"]:
embedding_api_key = settings["embeddings"]["llm"]["api_key"]
if settings["embeddings"]["llm"]["api_base"]:
embedding_api_base = settings["embeddings"]["llm"]["api_base"]

text_embedder = OpenAIEmbedding(
api_key=os.getenv("GRAPHRAG_API_KEY"),
api_base=None,
api_key=embedding_api_key,
api_base=embedding_api_base,
api_type=OpenaiApiType.OpenAI,
model=embedding_model,
deployment_name=embedding_model,
Expand Down
2 changes: 1 addition & 1 deletion settings.yaml.example
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This is a sample GraphRAG settings.yaml file that allows users to run the GraphRAG index process with their customized parameters.
# The parameters in this file will only take effect when the USE_CUSTOMIZED_GRAPHRAG_SETTING is true.
# The parameters in this file will only take effect when the USE_CUSTOMIZED_GRAPHRAG_SETTING is true in .env file.
# For a comprehensive understanding of GraphRAG parameters, please refer to: https://microsoft.github.io/graphrag/config/json_yaml/.

encoding_model: cl100k_base
Expand Down

0 comments on commit 046d24f

Please sign in to comment.