From 282eaa07fccf25870b2c5b357cb134ab60428bab Mon Sep 17 00:00:00 2001 From: Huu Le <39040748+leehuwuj@users.noreply.github.com> Date: Wed, 13 Nov 2024 18:47:28 +0700 Subject: [PATCH] Fix: ts upload file does not create index and document store (#422) --- .changeset/big-turtles-own.md | 5 +++++ helpers/env-variables.ts | 8 +++++++- .../llamaindex/typescript/documents/pipeline.ts | 14 ++++++++++++-- .../vectordbs/typescript/none/generate.ts | 7 +++++-- .../components/vectordbs/typescript/none/index.ts | 7 +++++-- .../components/vectordbs/typescript/none/shared.ts | 1 - 6 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 .changeset/big-turtles-own.md delete mode 100644 templates/components/vectordbs/typescript/none/shared.ts diff --git a/.changeset/big-turtles-own.md b/.changeset/big-turtles-own.md new file mode 100644 index 000000000..ed9c194b4 --- /dev/null +++ b/.changeset/big-turtles-own.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Ensure that the index and document store are created when uploading a file with no available index. diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts index 07ae88e02..4a554ff04 100644 --- a/helpers/env-variables.ts +++ b/helpers/env-variables.ts @@ -217,7 +217,13 @@ Otherwise, use CHROMA_HOST and CHROMA_PORT config above`, }, ]; default: - return []; + return [ + { + name: "STORAGE_CACHE_DIR", + description: "The directory to store the local storage cache.", + value: ".cache", + }, + ]; } }; diff --git a/templates/components/llamaindex/typescript/documents/pipeline.ts b/templates/components/llamaindex/typescript/documents/pipeline.ts index 01b52fd5d..cd4d6d092 100644 --- a/templates/components/llamaindex/typescript/documents/pipeline.ts +++ b/templates/components/llamaindex/typescript/documents/pipeline.ts @@ -3,6 +3,7 @@ import { IngestionPipeline, Settings, SimpleNodeParser, + storageContextFromDefaults, VectorStoreIndex, } from "llamaindex"; @@ -28,11 +29,20 @@ export async function runPipeline( return documents.map((document) => document.id_); } else { // Initialize a new index with the documents - const newIndex = await VectorStoreIndex.fromDocuments(documents); - newIndex.storageContext.docStore.persist(); console.log( "Got empty index, created new index with the uploaded documents", ); + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } + const storageContext = await storageContextFromDefaults({ + persistDir, + }); + const newIndex = await VectorStoreIndex.fromDocuments(documents, { + storageContext, + }); + await newIndex.storageContext.docStore.persist(); return documents.map((document) => document.id_); } } diff --git a/templates/components/vectordbs/typescript/none/generate.ts b/templates/components/vectordbs/typescript/none/generate.ts index 595b27df1..4647361a3 100644 --- a/templates/components/vectordbs/typescript/none/generate.ts +++ b/templates/components/vectordbs/typescript/none/generate.ts @@ -5,7 +5,6 @@ import * as dotenv from "dotenv"; import { getDocuments } from "./loader"; import { initSettings } from "./settings"; -import { STORAGE_CACHE_DIR } from "./shared"; // Load environment variables from local .env file dotenv.config(); @@ -20,9 +19,13 @@ async function getRuntime(func: any) { async function generateDatasource() { console.log(`Generating storage context...`); // Split documents, create embeddings and store them in the storage context + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } const ms = await getRuntime(async () => { const storageContext = await storageContextFromDefaults({ - persistDir: STORAGE_CACHE_DIR, + persistDir, }); const documents = await getDocuments(); diff --git a/templates/components/vectordbs/typescript/none/index.ts b/templates/components/vectordbs/typescript/none/index.ts index fecc76f45..d38ea6001 100644 --- a/templates/components/vectordbs/typescript/none/index.ts +++ b/templates/components/vectordbs/typescript/none/index.ts @@ -1,10 +1,13 @@ import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex"; import { storageContextFromDefaults } from "llamaindex/storage/StorageContext"; -import { STORAGE_CACHE_DIR } from "./shared"; export async function getDataSource(params?: any) { + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } const storageContext = await storageContextFromDefaults({ - persistDir: `${STORAGE_CACHE_DIR}`, + persistDir, }); const numberOfDocs = Object.keys( diff --git a/templates/components/vectordbs/typescript/none/shared.ts b/templates/components/vectordbs/typescript/none/shared.ts deleted file mode 100644 index e7736e5b3..000000000 --- a/templates/components/vectordbs/typescript/none/shared.ts +++ /dev/null @@ -1 +0,0 @@ -export const STORAGE_CACHE_DIR = "./cache";