Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: TypeScript templates do not create a new LlamaCloud index or upload a file to an existing index. #356

Merged
merged 14 commits into from
Oct 10, 2024
Merged
2 changes: 1 addition & 1 deletion .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
python-version: ["3.11"]
os: [macos-latest, windows-latest, ubuntu-22.04]
frameworks: ["nextjs", "express"]
datasources: ["--no-files", "--example-file"]
datasources: ["--no-files", "--example-file", "--llamacloud"]
defaults:
run:
shell: bash
Expand Down
7 changes: 7 additions & 0 deletions e2e/shared/streaming_template.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ const userMessage =
dataSource !== "--no-files" ? "Physical standard for letters" : "Hello";

test.describe(`Test streaming template ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
const isNode18 = process.version.startsWith("v18");
const isLlamaCloud = dataSource === "--llamacloud";
// llamacloud is using File API which is not supported on node 18
if (isNode18 && isLlamaCloud) {
test.skip(true, "Skipping tests for Node 18 and LlamaCloud data source");
}

let port: number;
let externalPort: number;
let cwd: string;
Expand Down
28 changes: 20 additions & 8 deletions templates/components/llamaindex/typescript/documents/upload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,26 @@ export async function uploadDocument(
// trigger LlamaCloudIndex API to upload the file and run the pipeline
const projectId = await index.getProjectId();
const pipelineId = await index.getPipelineId();
return [
await LLamaCloudFileService.addFileToPipeline(
projectId,
pipelineId,
new File([fileBuffer], filename, { type: mimeType }),
{ private: "true" },
),
];
try {
return [
await LLamaCloudFileService.addFileToPipeline(
projectId,
pipelineId,
new File([fileBuffer], filename, { type: mimeType }),
{ private: "true" },
),
];
} catch (error) {
if (
error instanceof ReferenceError &&
error.message.includes("File is not defined")
) {
throw new Error(
"File class is not supported in the current Node.js version. Please use Node.js 20 or higher.",
);
}
throw error;
}
}

// run the pipeline for other vector store indexes
Expand Down
18 changes: 6 additions & 12 deletions templates/components/vectordbs/python/llamacloud/generate.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
# flake8: noqa: E402
import os

from dotenv import load_dotenv

load_dotenv()

from llama_cloud import PipelineType

from app.settings import init_settings
from llama_index.core.settings import Settings

import logging

from app.engine.index import get_client, get_index

import logging
from llama_index.core.readers import SimpleDirectoryReader
from app.engine.service import LLamaCloudFileService
from app.settings import init_settings
from llama_cloud import PipelineType
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.settings import Settings

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
Expand Down Expand Up @@ -83,10 +81,6 @@ def generate_datasource():
project_id,
pipeline_id,
f,
custom_metadata={
# Set private=false to mark the document as public (required for filtering)
"private": "false",
},
)

logger.info("Finished generating the index")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def generate_filters(doc_ids):
"""
Generate public/private document filters based on the doc_ids and the vector store.
"""
# Using "is_empty" filter to include the documents don't have the "private" key because they're uploaded in LlamaCloud UI
# public documents (ingested by "poetry run generate" or in the LlamaCloud UI) don't have the "private" field
public_doc_filter = MetadataFilter(
key="private",
value=None,
Expand Down
23 changes: 19 additions & 4 deletions templates/components/vectordbs/typescript/llamacloud/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,32 @@ async function* walk(dir: string): AsyncGenerator<string> {

async function loadAndIndex() {
const index = await getDataSource();
// ensure the index is available or create a new one
await index.ensureIndex();
const projectId = await index.getProjectId();
const pipelineId = await index.getPipelineId();

// walk through the data directory and upload each file to LlamaCloud
for await (const filePath of walk(DATA_DIR)) {
const buffer = await fs.readFile(filePath);
const filename = path.basename(filePath);
const file = new File([buffer], filename);
await LLamaCloudFileService.addFileToPipeline(projectId, pipelineId, file, {
private: "false",
});
try {
await LLamaCloudFileService.addFileToPipeline(
projectId,
pipelineId,
new File([buffer], filename),
);
} catch (error) {
if (
error instanceof ReferenceError &&
error.message.includes("File is not defined")
) {
throw new Error(
"File class is not supported in the current Node.js version. Please use Node.js 20 or higher.",
);
}
throw error;
}
}

console.log(`Successfully uploaded documents to LlamaCloud!`);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { CloudRetrieveParams, MetadataFilter } from "llamaindex";

export function generateFilters(documentIds: string[]) {
// public documents don't have the "private" field or it's set to "false"
// public documents (ingested by "npm run generate" or in the LlamaCloud UI) don't have the "private" field
const publicDocumentsFilter: MetadataFilter = {
key: "private",
operator: "is_empty",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import logging
import os

from fastapi import APIRouter
from fastapi import APIRouter, HTTPException

from app.api.routers.models import ChatConfig


config_router = r = APIRouter()

logger = logging.getLogger("uvicorn")
Expand All @@ -27,6 +26,10 @@ async def chat_config() -> ChatConfig:

@r.get("/llamacloud")
async def chat_llama_cloud_config():
if not os.getenv("LLAMA_CLOUD_API_KEY"):
raise HTTPException(
status_code=500, detail="LlamaCloud API KEY is not configured"
)
projects = LLamaCloudFileService.get_all_projects_with_pipelines()
pipeline = os.getenv("LLAMA_CLOUD_INDEX_NAME")
project = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,16 @@ export function LlamaCloudSelector({
useEffect(() => {
if (process.env.NEXT_PUBLIC_USE_LLAMACLOUD === "true" && !config) {
fetch(`${backend}/api/chat/config/llamacloud`)
.then((response) => response.json())
.then((response) => {
if (!response.ok) {
return response.json().then((errorData) => {
window.alert(
`Error: ${JSON.stringify(errorData) || "Unknown error occurred"}`,
);
});
}
return response.json();
})
.then((data) => {
const pipeline = defaultPipeline ?? data.pipeline; // defaultPipeline will override pipeline in .env
setConfig({ ...data, pipeline });
Expand Down
Loading