Skip to content

Commit 0714e32

Browse files
authored
let file in knowledgebases visible in file manager (infiniflow#714)
### What problem does this PR solve? Let file in knowledgebases visible in file manager. infiniflow#162 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
1 parent 41debb6 commit 0714e32

10 files changed

+187
-64
lines changed

api/apps/document_app.py

+29-14
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from flask import request
2424
from flask_login import login_required, current_user
2525

26-
from api.db.db_models import Task
26+
from api.db.db_models import Task, File
2727
from api.db.services.file2document_service import File2DocumentService
2828
from api.db.services.file_service import FileService
2929
from api.db.services.task_service import TaskService, queue_tasks
@@ -33,7 +33,7 @@
3333
from api.db.services.knowledgebase_service import KnowledgebaseService
3434
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
3535
from api.utils import get_uuid
36-
from api.db import FileType, TaskStatus, ParserType
36+
from api.db import FileType, TaskStatus, ParserType, FileSource
3737
from api.db.services.document_service import DocumentService
3838
from api.settings import RetCode
3939
from api.utils.api_utils import get_json_result
@@ -59,12 +59,19 @@ def upload():
5959
return get_json_result(
6060
data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
6161

62+
e, kb = KnowledgebaseService.get_by_id(kb_id)
63+
if not e:
64+
raise LookupError("Can't find this knowledgebase!")
65+
66+
root_folder = FileService.get_root_folder(current_user.id)
67+
pf_id = root_folder["id"]
68+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
69+
kb_root_folder = FileService.get_kb_folder(current_user.id)
70+
kb_folder = FileService.new_a_file_from_kb(kb.tenant_id, kb.name, kb_root_folder["id"])
71+
6272
err = []
6373
for file in file_objs:
6474
try:
65-
e, kb = KnowledgebaseService.get_by_id(kb_id)
66-
if not e:
67-
raise LookupError("Can't find this knowledgebase!")
6875
MAX_FILE_NUM_PER_USER = int(os.environ.get('MAX_FILE_NUM_PER_USER', 0))
6976
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(kb.tenant_id) >= MAX_FILE_NUM_PER_USER:
7077
raise RuntimeError("Exceed the maximum file number of a free user!")
@@ -99,6 +106,8 @@ def upload():
99106
if re.search(r"\.(ppt|pptx|pages)$", filename):
100107
doc["parser_id"] = ParserType.PRESENTATION.value
101108
DocumentService.insert(doc)
109+
110+
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
102111
except Exception as e:
103112
err.append(file.filename + ": " + str(e))
104113
if err:
@@ -228,11 +237,13 @@ def rm():
228237
req = request.json
229238
doc_ids = req["doc_id"]
230239
if isinstance(doc_ids, str): doc_ids = [doc_ids]
240+
root_folder = FileService.get_root_folder(current_user.id)
241+
pf_id = root_folder["id"]
242+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
231243
errors = ""
232244
for doc_id in doc_ids:
233245
try:
234246
e, doc = DocumentService.get_by_id(doc_id)
235-
236247
if not e:
237248
return get_data_error_result(retmsg="Document not found!")
238249
tenant_id = DocumentService.get_tenant_id(doc_id)
@@ -241,21 +252,25 @@ def rm():
241252

242253
ELASTICSEARCH.deleteByQuery(
243254
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
244-
DocumentService.increment_chunk_num(
245-
doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
255+
256+
DocumentService.clear_chunk_num(doc_id)
257+
b, n = File2DocumentService.get_minio_address(doc_id=doc_id)
258+
246259
if not DocumentService.delete(doc):
247260
return get_data_error_result(
248261
retmsg="Database error (Document removal)!")
249262

250-
informs = File2DocumentService.get_by_document_id(doc_id)
251-
if not informs:
252-
MINIO.rm(doc.kb_id, doc.location)
253-
else:
254-
File2DocumentService.delete_by_document_id(doc_id)
263+
f2d = File2DocumentService.get_by_document_id(doc_id)
264+
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
265+
File2DocumentService.delete_by_document_id(doc_id)
266+
267+
MINIO.rm(b, n)
255268
except Exception as e:
256269
errors += str(e)
257270

258-
if errors: return server_error_response(e)
271+
if errors:
272+
return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR)
273+
259274
return get_json_result(data=True)
260275

261276

api/apps/file_app.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from api.db.services.file2document_service import File2DocumentService
2727
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
2828
from api.utils import get_uuid
29-
from api.db import FileType
29+
from api.db import FileType, FileSource
3030
from api.db.services import duplicate_name
3131
from api.db.services.file_service import FileService
3232
from api.settings import RetCode
@@ -45,7 +45,7 @@ def upload():
4545

4646
if not pf_id:
4747
root_folder = FileService.get_root_folder(current_user.id)
48-
pf_id = root_folder.id
48+
pf_id = root_folder["id"]
4949

5050
if 'file' not in request.files:
5151
return get_json_result(
@@ -132,7 +132,7 @@ def create():
132132
input_file_type = request.json.get("type")
133133
if not pf_id:
134134
root_folder = FileService.get_root_folder(current_user.id)
135-
pf_id = root_folder.id
135+
pf_id = root_folder["id"]
136136

137137
try:
138138
if not FileService.is_parent_folder_exist(pf_id):
@@ -176,7 +176,8 @@ def list():
176176
desc = request.args.get("desc", True)
177177
if not pf_id:
178178
root_folder = FileService.get_root_folder(current_user.id)
179-
pf_id = root_folder.id
179+
pf_id = root_folder["id"]
180+
FileService.init_knowledgebase_docs(pf_id, current_user.id)
180181
try:
181182
e, file = FileService.get_by_id(pf_id)
182183
if not e:
@@ -199,7 +200,7 @@ def list():
199200
def get_root_folder():
200201
try:
201202
root_folder = FileService.get_root_folder(current_user.id)
202-
return get_json_result(data={"root_folder": root_folder.to_json()})
203+
return get_json_result(data={"root_folder": root_folder})
203204
except Exception as e:
204205
return server_error_response(e)
205206

@@ -250,6 +251,8 @@ def rm():
250251
return get_data_error_result(retmsg="File or Folder not found!")
251252
if not file.tenant_id:
252253
return get_data_error_result(retmsg="Tenant not found!")
254+
if file.source_type == FileSource.KNOWLEDGEBASE:
255+
continue
253256

254257
if file.type == FileType.FOLDER.value:
255258
file_id_list = FileService.get_all_innermost_file_ids(file_id, [])

api/db/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,11 @@ class ParserType(StrEnum):
8383
NAIVE = "naive"
8484
PICTURE = "picture"
8585
ONE = "one"
86+
87+
88+
class FileSource(StrEnum):
89+
LOCAL = ""
90+
KNOWLEDGEBASE = "knowledgebase"
91+
S3 = "s3"
92+
93+
KNOWLEDGEBASE_FOLDER_NAME=".knowledgebase"

api/db/db_models.py

+21-5
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,13 @@
2121
from functools import wraps
2222
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
2323
from flask_login import UserMixin
24-
24+
from playhouse.migrate import MySQLMigrator, migrate
2525
from peewee import (
26-
BigAutoField, BigIntegerField, BooleanField, CharField,
27-
CompositeKey, Insert, IntegerField, TextField, FloatField, DateTimeField,
26+
BigIntegerField, BooleanField, CharField,
27+
CompositeKey, IntegerField, TextField, FloatField, DateTimeField,
2828
Field, Model, Metadata
2929
)
3030
from playhouse.pool import PooledMySQLDatabase
31-
3231
from api.db import SerializedType, ParserType
3332
from api.settings import DATABASE, stat_logger, SECRET_KEY
3433
from api.utils.log_utils import getLogger
@@ -344,7 +343,7 @@ class Meta:
344343

345344

346345
@DB.connection_context()
347-
def init_database_tables():
346+
def init_database_tables(alter_fields=[]):
348347
members = inspect.getmembers(sys.modules[__name__], inspect.isclass)
349348
table_objs = []
350349
create_failed_list = []
@@ -361,6 +360,7 @@ def init_database_tables():
361360
if create_failed_list:
362361
LOGGER.info(f"create tables failed: {create_failed_list}")
363362
raise Exception(f"create tables failed: {create_failed_list}")
363+
migrate_db()
364364

365365

366366
def fill_db_model_object(model_object, human_model_dict):
@@ -699,6 +699,11 @@ class File(DataBaseModel):
699699
help_text="where dose it store")
700700
size = IntegerField(default=0)
701701
type = CharField(max_length=32, null=False, help_text="file extension")
702+
source_type = CharField(
703+
max_length=128,
704+
null=False,
705+
default="",
706+
help_text="where dose this document come from")
702707

703708
class Meta:
704709
db_table = "file"
@@ -817,3 +822,14 @@ class API4Conversation(DataBaseModel):
817822

818823
class Meta:
819824
db_table = "api_4_conversation"
825+
826+
827+
def migrate_db():
828+
try:
829+
with DB.transaction():
830+
migrator = MySQLMigrator(DB)
831+
migrate(
832+
migrator.add_column('file', 'source_type', CharField(max_length=128, null=False, default="", help_text="where dose this document come from"))
833+
)
834+
except Exception as e:
835+
pass

api/db/services/document_service.py

+16
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,22 @@ def increment_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation):
150150
Knowledgebase.id == kb_id).execute()
151151
return num
152152

153+
@classmethod
154+
@DB.connection_context()
155+
def clear_chunk_num(cls, doc_id):
156+
doc = cls.model.get_by_id(doc_id)
157+
assert doc, "Can't fine document in database."
158+
159+
num = Knowledgebase.update(
160+
token_num=Knowledgebase.token_num -
161+
doc.token_num,
162+
chunk_num=Knowledgebase.chunk_num -
163+
doc.chunk_num,
164+
doc_num=Knowledgebase.doc_num-1
165+
).where(
166+
Knowledgebase.id == doc.kb_id).execute()
167+
return num
168+
153169
@classmethod
154170
@DB.connection_context()
155171
def get_tenant_id(cls, doc_id):

api/db/services/file2document_service.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
#
1616
from datetime import datetime
1717

18+
from api.db import FileSource
1819
from api.db.db_models import DB
19-
from api.db.db_models import File, Document, File2Document
20+
from api.db.db_models import File, File2Document
2021
from api.db.services.common_service import CommonService
2122
from api.db.services.document_service import DocumentService
22-
from api.db.services.file_service import FileService
23-
from api.utils import current_timestamp, datetime_format
23+
from api.utils import current_timestamp, datetime_format, get_uuid
2424

2525

2626
class File2DocumentService(CommonService):
@@ -71,13 +71,15 @@ def update_by_file_id(cls, file_id, obj):
7171
@DB.connection_context()
7272
def get_minio_address(cls, doc_id=None, file_id=None):
7373
if doc_id:
74-
ids = File2DocumentService.get_by_document_id(doc_id)
74+
f2d = cls.get_by_document_id(doc_id)
7575
else:
76-
ids = File2DocumentService.get_by_file_id(file_id)
77-
if ids:
78-
e, file = FileService.get_by_id(ids[0].file_id)
79-
return file.parent_id, file.location
80-
else:
81-
assert doc_id, "please specify doc_id"
82-
e, doc = DocumentService.get_by_id(doc_id)
83-
return doc.kb_id, doc.location
76+
f2d = cls.get_by_file_id(file_id)
77+
if f2d:
78+
file = File.get_by_id(f2d[0].file_id)
79+
if file.source_type == FileSource.LOCAL:
80+
return file.parent_id, file.location
81+
doc_id = f2d[0].document_id
82+
83+
assert doc_id, "please specify doc_id"
84+
e, doc = DocumentService.get_by_id(doc_id)
85+
return doc.kb_id, doc.location

0 commit comments

Comments
 (0)