Skip to content

Commit 023abdf

Browse files
Remove from AnythingLLM
Migrate to full use of OpenAI Compatible endpoints' resolves #3618
1 parent edaddd4 commit 023abdf

File tree

3 files changed

+75
-27
lines changed

3 files changed

+75
-27
lines changed

server/package.json

+1-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
"@aws-sdk/client-bedrock-runtime": "^3.775.0",
2424
"@azure/openai": "1.0.0-beta.10",
2525
"@datastax/astra-db-ts": "^0.1.3",
26-
"@google/generative-ai": "^0.7.1",
2726
"@ladjs/graceful": "^3.2.2",
2827
"@lancedb/lancedb": "0.15.0",
2928
"@langchain/anthropic": "0.1.16",
@@ -101,4 +100,4 @@
101100
"nodemon": "^2.0.22",
102101
"prettier": "^3.0.3"
103102
}
104-
}
103+
}

server/utils/EmbeddingEngines/gemini/index.js

+74-20
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1+
const { toChunks } = require("../../helpers");
2+
13
class GeminiEmbedder {
24
constructor() {
3-
if (!process.env.GEMINI_EMBEDDING_API_KEY)
4-
throw new Error("No Gemini API key was set.");
5+
if (!process.env.GEMINI_EMBEDDING_API_KEY) throw new Error("No Gemini API key was set.");
56

6-
// TODO: Deprecate this and use OpenAI interface instead - after which, remove the @google/generative-ai dependency
7-
const { GoogleGenerativeAI } = require("@google/generative-ai");
8-
const genAI = new GoogleGenerativeAI(process.env.GEMINI_EMBEDDING_API_KEY);
7+
const { OpenAI: OpenAIApi } = require("openai");
98
this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-004";
10-
this.gemini = genAI.getGenerativeModel({ model: this.model });
9+
this.openai = new OpenAIApi({
10+
apiKey: process.env.GEMINI_EMBEDDING_API_KEY,
11+
// Even models that are v1 in gemini API can be used with v1beta/openai/ endpoint and nobody knows why.
12+
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
13+
});
1114

12-
// This property is disabled as it causes issues when sending multiple chunks at once
13-
// since when given 4 chunks at once, the gemini api returns 1 embedding for all 4 chunks
14-
// instead of 4 embeddings - no idea why this is the case, but it is not how the results are
15-
// expected to be returned.
16-
// this.maxConcurrentChunks = 1;
15+
this.maxConcurrentChunks = 4;
1716

1817
// https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding-and-embedding
18+
// TODO: May need to make this dynamic based on the model
1919
this.embeddingMaxChunkLength = 2_048;
2020
this.log(`Initialized with ${this.model}`);
2121
}
@@ -30,8 +30,10 @@ class GeminiEmbedder {
3030
* @returns {Promise<Array<number>>} The embedding values
3131
*/
3232
async embedTextInput(textInput) {
33-
const result = await this.gemini.embedContent(textInput);
34-
return result.embedding.values || [];
33+
const result = await this.embedChunks(
34+
Array.isArray(textInput) ? textInput : [textInput]
35+
);
36+
return result?.[0] || [];
3537
}
3638

3739
/**
@@ -40,14 +42,66 @@ class GeminiEmbedder {
4042
* @returns {Promise<Array<Array<number>>>} The embedding values
4143
*/
4244
async embedChunks(textChunks = []) {
43-
let embeddings = [];
44-
for (const chunk of textChunks) {
45-
const results = await this.gemini.embedContent(chunk);
46-
if (!results.embedding || !results.embedding.values)
47-
throw new Error("No embedding values returned from gemini");
48-
embeddings.push(results.embedding.values);
45+
this.log(`Embedding ${textChunks.length} chunks...`);
46+
47+
// Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
48+
// we concurrently execute each max batch of text chunks possible.
49+
// Refer to constructor maxConcurrentChunks for more info.
50+
const embeddingRequests = [];
51+
for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
52+
embeddingRequests.push(
53+
new Promise((resolve) => {
54+
this.openai.embeddings
55+
.create({
56+
model: this.model,
57+
input: chunk,
58+
})
59+
.then((result) => {
60+
resolve({ data: result?.data, error: null });
61+
})
62+
.catch((e) => {
63+
e.type =
64+
e?.response?.data?.error?.code ||
65+
e?.response?.status ||
66+
"failed_to_embed";
67+
e.message = e?.response?.data?.error?.message || e.message;
68+
resolve({ data: [], error: e });
69+
});
70+
})
71+
);
4972
}
50-
return embeddings;
73+
74+
const { data = [], error = null } = await Promise.all(
75+
embeddingRequests
76+
).then((results) => {
77+
// If any errors were returned from OpenAI abort the entire sequence because the embeddings
78+
// will be incomplete.
79+
const errors = results
80+
.filter((res) => !!res.error)
81+
.map((res) => res.error)
82+
.flat();
83+
if (errors.length > 0) {
84+
let uniqueErrors = new Set();
85+
errors.map((error) =>
86+
uniqueErrors.add(`[${error.type}]: ${error.message}`)
87+
);
88+
89+
return {
90+
data: [],
91+
error: Array.from(uniqueErrors).join(", "),
92+
};
93+
}
94+
return {
95+
data: results.map((res) => res?.data || []).flat(),
96+
error: null,
97+
};
98+
});
99+
100+
if (!!error) throw new Error(`OpenAI Failed to embed: ${error}`);
101+
return data.length > 0 &&
102+
data.every((embd) => embd.hasOwnProperty("embedding"))
103+
? data.map((embd) => embd.embedding)
104+
: null;
51105
}
52106
}
53107

server/yarn.lock

-5
Original file line numberDiff line numberDiff line change
@@ -1240,11 +1240,6 @@
12401240
resolved "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.1.3.tgz"
12411241
integrity sha512-Cm4uJX1sKarpm1mje/MiOIinM7zdUUrQp/5/qGPAgznbdd/B9zup5ehT6c1qGqycFcSopTA1J1HpqHS5kJR8hQ==
12421242

1243-
"@google/generative-ai@^0.7.1":
1244-
version "0.7.1"
1245-
resolved "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.7.1.tgz"
1246-
integrity sha512-WTjMLLYL/xfA5BW6xAycRPiAX7FNHKAxrid/ayqC1QMam0KAK0NbMeS9Lubw80gVg5xFMLE+H7pw4wdNzTOlxw==
1247-
12481243
"@graphql-typed-document-node/core@^3.1.1":
12491244
version "3.2.0"
12501245
resolved "https://registry.npmjs.org/@graphql-typed-document-node/core/-/core-3.2.0.tgz"

0 commit comments

Comments
 (0)