From 023abdfa2a9ec9789499067cc09dd5e4fc82e8c4 Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Tue, 29 Apr 2025 11:34:33 -0700 Subject: [PATCH 1/2] Remove from AnythingLLM Migrate to full use of OpenAI Compatible endpoints' resolves #3618 --- server/package.json | 3 +- server/utils/EmbeddingEngines/gemini/index.js | 94 +++++++++++++++---- server/yarn.lock | 5 - 3 files changed, 75 insertions(+), 27 deletions(-) diff --git a/server/package.json b/server/package.json index db153eb214..1312dc689d 100644 --- a/server/package.json +++ b/server/package.json @@ -23,7 +23,6 @@ "@aws-sdk/client-bedrock-runtime": "^3.775.0", "@azure/openai": "1.0.0-beta.10", "@datastax/astra-db-ts": "^0.1.3", - "@google/generative-ai": "^0.7.1", "@ladjs/graceful": "^3.2.2", "@lancedb/lancedb": "0.15.0", "@langchain/anthropic": "0.1.16", @@ -101,4 +100,4 @@ "nodemon": "^2.0.22", "prettier": "^3.0.3" } -} +} \ No newline at end of file diff --git a/server/utils/EmbeddingEngines/gemini/index.js b/server/utils/EmbeddingEngines/gemini/index.js index 4c60501a88..629d0818ef 100644 --- a/server/utils/EmbeddingEngines/gemini/index.js +++ b/server/utils/EmbeddingEngines/gemini/index.js @@ -1,21 +1,21 @@ +const { toChunks } = require("../../helpers"); + class GeminiEmbedder { constructor() { - if (!process.env.GEMINI_EMBEDDING_API_KEY) - throw new Error("No Gemini API key was set."); + if (!process.env.GEMINI_EMBEDDING_API_KEY) throw new Error("No Gemini API key was set."); - // TODO: Deprecate this and use OpenAI interface instead - after which, remove the @google/generative-ai dependency - const { GoogleGenerativeAI } = require("@google/generative-ai"); - const genAI = new GoogleGenerativeAI(process.env.GEMINI_EMBEDDING_API_KEY); + const { OpenAI: OpenAIApi } = require("openai"); this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004"; - this.gemini = genAI.getGenerativeModel({ model: this.model }); + this.openai = new OpenAIApi({ + apiKey: process.env.GEMINI_EMBEDDING_API_KEY, + // Even models that are v1 in gemini API can be used with v1beta/openai/ endpoint and nobody knows why. + baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/", + }); - // This property is disabled as it causes issues when sending multiple chunks at once - // since when given 4 chunks at once, the gemini api returns 1 embedding for all 4 chunks - // instead of 4 embeddings - no idea why this is the case, but it is not how the results are - // expected to be returned. - // this.maxConcurrentChunks = 1; + this.maxConcurrentChunks = 4; // https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding + // TODO: May need to make this dynamic based on the model this.embeddingMaxChunkLength = 2_048; this.log(`Initialized with ${this.model}`); } @@ -30,8 +30,10 @@ class GeminiEmbedder { * @returns {Promise>} The embedding values */ async embedTextInput(textInput) { - const result = await this.gemini.embedContent(textInput); - return result.embedding.values || []; + const result = await this.embedChunks( + Array.isArray(textInput) ? textInput : [textInput] + ); + return result?.[0] || []; } /** @@ -40,14 +42,66 @@ class GeminiEmbedder { * @returns {Promise>>} The embedding values */ async embedChunks(textChunks = []) { - let embeddings = []; - for (const chunk of textChunks) { - const results = await this.gemini.embedContent(chunk); - if (!results.embedding || !results.embedding.values) - throw new Error("No embedding values returned from gemini"); - embeddings.push(results.embedding.values); + this.log(`Embedding ${textChunks.length} chunks...`); + + // Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb) + // we concurrently execute each max batch of text chunks possible. + // Refer to constructor maxConcurrentChunks for more info. + const embeddingRequests = []; + for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) { + embeddingRequests.push( + new Promise((resolve) => { + this.openai.embeddings + .create({ + model: this.model, + input: chunk, + }) + .then((result) => { + resolve({ data: result?.data, error: null }); + }) + .catch((e) => { + e.type = + e?.response?.data?.error?.code || + e?.response?.status || + "failed_to_embed"; + e.message = e?.response?.data?.error?.message || e.message; + resolve({ data: [], error: e }); + }); + }) + ); } - return embeddings; + + const { data = [], error = null } = await Promise.all( + embeddingRequests + ).then((results) => { + // If any errors were returned from OpenAI abort the entire sequence because the embeddings + // will be incomplete. + const errors = results + .filter((res) => !!res.error) + .map((res) => res.error) + .flat(); + if (errors.length > 0) { + let uniqueErrors = new Set(); + errors.map((error) => + uniqueErrors.add(`[${error.type}]: ${error.message}`) + ); + + return { + data: [], + error: Array.from(uniqueErrors).join(", "), + }; + } + return { + data: results.map((res) => res?.data || []).flat(), + error: null, + }; + }); + + if (!!error) throw new Error(`OpenAI Failed to embed: ${error}`); + return data.length > 0 && + data.every((embd) => embd.hasOwnProperty("embedding")) + ? data.map((embd) => embd.embedding) + : null; } } diff --git a/server/yarn.lock b/server/yarn.lock index d263a84b1b..907f3d2f1c 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -1240,11 +1240,6 @@ resolved "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.1.3.tgz" integrity sha512-Cm4uJX1sKarpm1mje/MiOIinM7zdUUrQp/5/qGPAgznbdd/B9zup5ehT6c1qGqycFcSopTA1J1HpqHS5kJR8hQ== -"@google/generative-ai@^0.7.1": - version "0.7.1" - resolved "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.7.1.tgz" - integrity sha512-WTjMLLYL/xfA5BW6xAycRPiAX7FNHKAxrid/ayqC1QMam0KAK0NbMeS9Lubw80gVg5xFMLE+H7pw4wdNzTOlxw== - "@graphql-typed-document-node/core@^3.1.1": version "3.2.0" resolved "https://registry.npmjs.org/@graphql-typed-document-node/core/-/core-3.2.0.tgz" From f0ad10c48c13454fcb79e1d88aba1c9adfa69def Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Tue, 29 Apr 2025 11:36:30 -0700 Subject: [PATCH 2/2] lint --- frontend/src/index.css | 2 +- server/utils/EmbeddingEngines/gemini/index.js | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/frontend/src/index.css b/frontend/src/index.css index c75b98d284..fa88feab6b 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -117,7 +117,7 @@ --theme-chat-input-border: #cccccc; --theme-action-menu-bg: #eaeaea; --theme-action-menu-item-hover: rgba(0, 0, 0, 0.1); - --theme-settings-input-bg: #EDF2FA; + --theme-settings-input-bg: #edf2fa; --theme-settings-input-placeholder: rgba(0, 0, 0, 0.5); --theme-settings-input-active: rgb(0 0 0 / 0.2); --theme-settings-input-text: #0e0f0f; diff --git a/server/utils/EmbeddingEngines/gemini/index.js b/server/utils/EmbeddingEngines/gemini/index.js index 629d0818ef..a7d50c9066 100644 --- a/server/utils/EmbeddingEngines/gemini/index.js +++ b/server/utils/EmbeddingEngines/gemini/index.js @@ -2,7 +2,8 @@ const { toChunks } = require("../../helpers"); class GeminiEmbedder { constructor() { - if (!process.env.GEMINI_EMBEDDING_API_KEY) throw new Error("No Gemini API key was set."); + if (!process.env.GEMINI_EMBEDDING_API_KEY) + throw new Error("No Gemini API key was set."); const { OpenAI: OpenAIApi } = require("openai"); this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004";