Skip to content

Commit d7e4783

Browse files
authored
Add endpoint for batch uploading document metadata (#404)
1 parent e397acb commit d7e4783

File tree

3 files changed

+139
-0
lines changed

3 files changed

+139
-0
lines changed

ragna/deploy/_api/core.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,33 @@ async def create_document_upload_info(
160160
)
161161
return schemas.DocumentUpload(parameters=parameters, document=document)
162162

163+
# TODO: Add UI support and documentation for this endpoint (#406)
164+
@app.post("/documents")
165+
async def create_documents_upload_info(
166+
user: UserDependency,
167+
names: Annotated[list[str], Body(..., embed=True)],
168+
) -> list[schemas.DocumentUpload]:
169+
with get_session() as session:
170+
document_metadata_collection = []
171+
document_upload_collection = []
172+
for name in names:
173+
document = schemas.Document(name=name)
174+
metadata, parameters = await config.document.get_upload_info(
175+
config=config, user=user, id=document.id, name=document.name
176+
)
177+
document_metadata_collection.append((document, metadata))
178+
document_upload_collection.append(
179+
schemas.DocumentUpload(parameters=parameters, document=document)
180+
)
181+
182+
database.add_documents(
183+
session,
184+
user=user,
185+
document_metadata_collection=document_metadata_collection,
186+
)
187+
return document_upload_collection
188+
189+
# TODO: Add new endpoint for batch uploading documents (#407)
163190
@app.put("/document")
164191
async def upload_document(
165192
token: Annotated[str, Form()], file: UploadFile

ragna/deploy/_api/database.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,32 @@ def add_document(
5555
session.commit()
5656

5757

58+
def add_documents(
59+
session: Session,
60+
*,
61+
user: str,
62+
document_metadata_collection: list[tuple[schemas.Document, dict[str, Any]]],
63+
) -> None:
64+
"""
65+
Add multiple documents to the database.
66+
67+
This function allows adding multiple documents at once by calling `add_all`. This is
68+
important when there is non-negligible latency attached to each database operation.
69+
"""
70+
user_id = _get_user_id(session, user)
71+
documents = [
72+
orm.Document(
73+
id=document.id,
74+
user_id=user_id,
75+
name=document.name,
76+
metadata_=metadata,
77+
)
78+
for document, metadata in document_metadata_collection
79+
]
80+
session.add_all(documents)
81+
session.commit()
82+
83+
5884
def _orm_to_schema_document(document: orm.Document) -> schemas.Document:
5985
return schemas.Document(id=document.id, name=document.name)
6086

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from fastapi import status
2+
from fastapi.testclient import TestClient
3+
4+
from ragna.deploy import Config
5+
from ragna.deploy._api import app
6+
7+
from .utils import authenticate
8+
9+
10+
def test_batch_sequential_upload_equivalence(tmp_local_root):
11+
"Check that uploading documents sequentially and in batch gives the same result"
12+
config = Config(local_root=tmp_local_root)
13+
14+
document_root = config.local_root / "documents"
15+
document_root.mkdir()
16+
document_path1 = document_root / "test1.txt"
17+
with open(document_path1, "w") as file:
18+
file.write("!\n")
19+
document_path2 = document_root / "test2.txt"
20+
with open(document_path2, "w") as file:
21+
file.write("?\n")
22+
23+
with TestClient(
24+
app(config=Config(), ignore_unavailable_components=False)
25+
) as client:
26+
authenticate(client)
27+
28+
document1_upload = (
29+
client.post("/document", json={"name": document_path1.name})
30+
.raise_for_status()
31+
.json()
32+
)
33+
document2_upload = (
34+
client.post("/document", json={"name": document_path2.name})
35+
.raise_for_status()
36+
.json()
37+
)
38+
39+
documents_upload = (
40+
client.post(
41+
"/documents", json={"names": [document_path1.name, document_path2.name]}
42+
)
43+
.raise_for_status()
44+
.json()
45+
)
46+
47+
assert (
48+
document1_upload["parameters"]["url"]
49+
== documents_upload[0]["parameters"]["url"]
50+
)
51+
assert (
52+
document2_upload["parameters"]["url"]
53+
== documents_upload[1]["parameters"]["url"]
54+
)
55+
56+
assert (
57+
document1_upload["document"]["name"]
58+
== documents_upload[0]["document"]["name"]
59+
)
60+
assert (
61+
document2_upload["document"]["name"]
62+
== documents_upload[1]["document"]["name"]
63+
)
64+
65+
# assuming that if test passes for first document it will also pass for the other
66+
with open(document_path1, "rb") as file:
67+
response_sequential_upload1 = client.request(
68+
document1_upload["parameters"]["method"],
69+
document1_upload["parameters"]["url"],
70+
data=document1_upload["parameters"]["data"],
71+
files={"file": file},
72+
)
73+
response_batch_upload1 = client.request(
74+
documents_upload[0]["parameters"]["method"],
75+
documents_upload[0]["parameters"]["url"],
76+
data=documents_upload[0]["parameters"]["data"],
77+
files={"file": file},
78+
)
79+
80+
assert response_sequential_upload1.status_code == status.HTTP_200_OK
81+
assert response_batch_upload1.status_code == status.HTTP_200_OK
82+
83+
assert (
84+
response_sequential_upload1.json()["name"]
85+
== response_batch_upload1.json()["name"]
86+
)

0 commit comments

Comments
 (0)