2
2
3
3
from pydantic import BaseModel
4
4
5
+ from minds .knowledge_bases .preprocessing import PreprocessingConfig
5
6
from minds .rest_api import RestAPI
6
7
7
8
@@ -25,6 +26,8 @@ class KnowledgeBaseConfig(BaseModel):
25
26
description : str
26
27
vector_store_config : Optional [VectorStoreConfig ] = None
27
28
embedding_config : Optional [EmbeddingConfig ] = None
29
+ # Params to apply to retrieval pipeline.
30
+ params : Optional [Dict ] = None
28
31
29
32
30
33
class KnowledgeBaseDocument (BaseModel ):
@@ -39,7 +42,7 @@ def __init__(self, name, api: RestAPI):
39
42
self .name = name
40
43
self .api = api
41
44
42
- def insert_from_select (self , query : str ):
45
+ def insert_from_select (self , query : str , preprocessing_config : PreprocessingConfig = None ):
43
46
'''
44
47
Inserts select content of a connected datasource into this knowledge base
45
48
@@ -48,9 +51,11 @@ def insert_from_select(self, query: str):
48
51
update_request = {
49
52
'query' : query
50
53
}
54
+ if preprocessing_config is not None :
55
+ update_request ['preprocessing' ] = preprocessing_config .model_dump ()
51
56
_ = self .api .put (f'/knowledge_bases/{ self .name } ' , data = update_request )
52
57
53
- def insert_documents (self , documents : List [KnowledgeBaseDocument ]):
58
+ def insert_documents (self , documents : List [KnowledgeBaseDocument ], preprocessing_config : PreprocessingConfig = None ):
54
59
'''
55
60
Inserts documents directly into this knowledge base
56
61
@@ -59,9 +64,11 @@ def insert_documents(self, documents: List[KnowledgeBaseDocument]):
59
64
update_request = {
60
65
'rows' : [d .model_dump () for d in documents ]
61
66
}
67
+ if preprocessing_config is not None :
68
+ update_request ['preprocessing' ] = preprocessing_config .model_dump ()
62
69
_ = self .api .put (f'/knowledge_bases/{ self .name } ' , data = update_request )
63
70
64
- def insert_urls (self , urls : List [str ]):
71
+ def insert_urls (self , urls : List [str ], preprocessing_config : PreprocessingConfig = None ):
65
72
'''
66
73
Crawls URLs & inserts the retrieved webpages into this knowledge base
67
74
@@ -70,9 +77,11 @@ def insert_urls(self, urls: List[str]):
70
77
update_request = {
71
78
'urls' : urls
72
79
}
80
+ if preprocessing_config is not None :
81
+ update_request ['preprocessing' ] = preprocessing_config .model_dump ()
73
82
_ = self .api .put (f'/knowledge_bases/{ self .name } ' , data = update_request )
74
83
75
- def insert_files (self , files : List [str ]):
84
+ def insert_files (self , files : List [str ], preprocessing_config : PreprocessingConfig = None ):
76
85
'''
77
86
Inserts files that have already been uploaded to MindsDB into this knowledge base
78
87
@@ -81,6 +90,8 @@ def insert_files(self, files: List[str]):
81
90
update_request = {
82
91
'files' : files
83
92
}
93
+ if preprocessing_config is not None :
94
+ update_request ['preprocessing' ] = preprocessing_config .model_dump ()
84
95
_ = self .api .put (f'/knowledge_bases/{ self .name } ' , data = update_request )
85
96
86
97
@@ -117,6 +128,8 @@ def create(self, config: KnowledgeBaseConfig) -> KnowledgeBase:
117
128
if config .embedding_config .params is not None :
118
129
embedding_data .update (config .embedding_config .params )
119
130
create_request ['embedding_model' ] = embedding_data
131
+ if config .params is not None :
132
+ create_request ['params' ] = config .params
120
133
121
134
_ = self .api .post ('/knowledge_bases' , data = create_request )
122
135
return self .get (config .name )
0 commit comments