|
13 | 13 | from middlewares.cors import add_cors_middleware
|
14 | 14 | from models.chats import ChatMessage
|
15 | 15 | from models.users import User
|
| 16 | +from parsers.github import process_github |
16 | 17 | from utils.file import convert_bytes, get_file_size
|
17 | 18 | from utils.processors import filter_file
|
18 | 19 | from utils.vectors import (CommonsDep, create_user, similarity_search,
|
@@ -114,18 +115,42 @@ async def chat_endpoint(commons: CommonsDep, chat_message: ChatMessage, credenti
|
114 | 115 |
|
115 | 116 | @app.post("/crawl/", dependencies=[Depends(JWTBearer())])
|
116 | 117 | async def crawl_endpoint(commons: CommonsDep, crawl_website: CrawlWebsite, enable_summarization: bool = False, credentials: dict = Depends(JWTBearer())):
|
| 118 | + max_brain_size = os.getenv("MAX_BRAIN_SIZE") |
| 119 | + |
117 | 120 | user = User(email=credentials.get('email', 'none'))
|
118 |
| - file_path, file_name = crawl_website.process() |
| 121 | + user_vectors_response = commons['supabase'].table("vectors").select( |
| 122 | + "name:metadata->>file_name, size:metadata->>file_size", count="exact") \ |
| 123 | + .filter("user_id", "eq", user.email)\ |
| 124 | + .execute() |
| 125 | + documents = user_vectors_response.data # Access the data from the response |
| 126 | + # Convert each dictionary to a tuple of items, then to a set to remove duplicates, and then back to a dictionary |
| 127 | + user_unique_vectors = [dict(t) for t in set(tuple(d.items()) for d in documents)] |
119 | 128 |
|
120 |
| - # Create a SpooledTemporaryFile from the file_path |
121 |
| - spooled_file = SpooledTemporaryFile() |
122 |
| - with open(file_path, 'rb') as f: |
123 |
| - shutil.copyfileobj(f, spooled_file) |
| 129 | + current_brain_size = sum(float(doc['size']) for doc in user_unique_vectors) |
124 | 130 |
|
125 |
| - # Pass the SpooledTemporaryFile to UploadFile |
126 |
| - file = UploadFile(file=spooled_file, filename=file_name) |
127 |
| - message = await filter_file(file, enable_summarization, commons['supabase'], user=user) |
128 |
| - return message |
| 131 | + file_size = 1000000 |
| 132 | + |
| 133 | + remaining_free_space = float(max_brain_size) - (current_brain_size) |
| 134 | + |
| 135 | + if remaining_free_space - file_size < 0: |
| 136 | + message = {"message": f"❌ User's brain will exceed maximum capacity with this upload. Maximum file allowed is : {convert_bytes(remaining_free_space)}", "type": "error"} |
| 137 | + else: |
| 138 | + user = User(email=credentials.get('email', 'none')) |
| 139 | + if not crawl_website.checkGithub(): |
| 140 | + |
| 141 | + file_path, file_name = crawl_website.process() |
| 142 | + |
| 143 | + # Create a SpooledTemporaryFile from the file_path |
| 144 | + spooled_file = SpooledTemporaryFile() |
| 145 | + with open(file_path, 'rb') as f: |
| 146 | + shutil.copyfileobj(f, spooled_file) |
| 147 | + |
| 148 | + # Pass the SpooledTemporaryFile to UploadFile |
| 149 | + file = UploadFile(file=spooled_file, filename=file_name) |
| 150 | + message = await filter_file(file, enable_summarization, commons['supabase'], user=user) |
| 151 | + return message |
| 152 | + else: |
| 153 | + message = await process_github(crawl_website.url, "false", user=user, supabase=commons['supabase']) |
129 | 154 |
|
130 | 155 |
|
131 | 156 | @app.get("/explore", dependencies=[Depends(JWTBearer())])
|
|
0 commit comments