-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataCollection.js
63 lines (41 loc) · 1.15 KB
/
DataCollection.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
require('dotenv').config();
require('fs')
const { Configuration, OpenAIApi } = require("openai");
const configuration = new Configuration({
apiKey: process.env.OPENAI_API_KEY,
});
const openai = new OpenAIApi(configuration);
function get_doc_embedding(text){
return get_embedding(text, DOC_EMBEDDINGS_MODEL)
}
function get_query_embedding(text){
return get_embedding(text, QUERY_EMBEDDINGS_MODEL)
}
async function compute_doc_embeddings(df) {
let Dic = {}
for(x in df) {
Dic[x] = await get_doc_embedding(df[x]["Content"])
}
return Dic
}
async function create_embeddings(df) {
const docEmb = await compute_doc_embeddings(df)
Final = []
for (x in docEmb) {
Final[x] = {
"Header":df[x]["Header"],
"Vectors":docEmb[x]
}
}
return Final
}
MODEL_NAME = "davinci"
DOC_EMBEDDINGS_MODEL = "text-search-"+MODEL_NAME+"-doc-001"
QUERY_EMBEDDINGS_MODEL = "text-search-"+MODEL_NAME+"-query-001"
async function get_embedding(text, model){
const result = await openai.createEmbedding({
"model": model,
"input": text
})
return result["data"]["data"][0]["embedding"]
}