Skip to content

Commit 7a13617

Browse files
authored
chore: add integration tests to s3 source (#447)
1 parent 92306b5 commit 7a13617

File tree

4 files changed

+144
-1
lines changed

4 files changed

+144
-1
lines changed

packages/ragbits-document-search/src/ragbits/document_search/documents/sources/s3.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ async def list_sources(cls, bucket_name: str, prefix: str) -> Sequence["S3Source
116116
cls._set_client(bucket_name)
117117
if cls._s3_client is None:
118118
raise RuntimeError("S3 client is not initialized.")
119-
with trace(bucket=cls.bucket_name, key=cls.key) as outputs:
119+
with trace(bucket=bucket_name, key=prefix) as outputs:
120120
try:
121121
aws_sources_list = []
122122
paginator = cls._s3_client.get_paginator("list_objects_v2")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import os
2+
from collections.abc import Generator
3+
from pathlib import Path
4+
5+
import boto3
6+
import pytest
7+
from moto import mock_s3
8+
9+
from ragbits.document_search.documents.sources import S3Source
10+
from ragbits.document_search.documents.sources.base import LOCAL_STORAGE_DIR_ENV
11+
12+
os.environ[LOCAL_STORAGE_DIR_ENV] = Path(__file__).parent.as_posix()
13+
14+
TEST_BUCKET = "test-bucket"
15+
TEST_KEY = "test-file.txt"
16+
TEST_CONTENT = "Hello, this is a test file!"
17+
TEST_REGION = "us-east-1"
18+
19+
20+
@pytest.fixture
21+
def s3_mock() -> Generator[boto3.client, None, None]:
22+
"""Create a mock S3 environment."""
23+
with mock_s3():
24+
s3 = boto3.client("s3", region_name=TEST_REGION)
25+
s3.create_bucket(Bucket=TEST_BUCKET)
26+
s3.put_object(Bucket=TEST_BUCKET, Key=TEST_KEY, Body=TEST_CONTENT)
27+
yield s3
28+
29+
30+
async def test_s3_source_fetch(s3_mock: boto3.client):
31+
"""Test fetching a file from S3."""
32+
source = S3Source(bucket_name=TEST_BUCKET, key=TEST_KEY)
33+
path = await source.fetch()
34+
35+
assert path.is_file()
36+
assert path.read_text() == TEST_CONTENT
37+
path.unlink()
38+
39+
40+
async def test_s3_source_fetch_not_found(s3_mock: boto3.client):
41+
"""Test fetching a non-existent file from S3."""
42+
source = S3Source(bucket_name=TEST_BUCKET, key="non-existent.txt")
43+
44+
with pytest.raises(FileNotFoundError) as exc:
45+
await source.fetch()
46+
47+
assert "The object does not exist" in str(exc.value)
48+
49+
50+
async def test_s3_source_list_sources(s3_mock: boto3.client):
51+
"""Test listing sources from S3."""
52+
s3_mock.put_object(Bucket=TEST_BUCKET, Key="folder1/file1.txt", Body="test1")
53+
s3_mock.put_object(Bucket=TEST_BUCKET, Key="folder1/file2.txt", Body="test2")
54+
55+
sources = await S3Source.list_sources(bucket_name=TEST_BUCKET, prefix="folder1/")
56+
57+
assert len(sources) == 2
58+
source_ids = {source.id for source in sources}
59+
assert f"s3://{TEST_BUCKET}/folder1/file1.txt" in source_ids
60+
assert f"s3://{TEST_BUCKET}/folder1/file2.txt" in source_ids
61+
62+
63+
async def test_s3_source_from_uri():
64+
"""Test creating S3Source from URI."""
65+
# Test s3:// URI
66+
sources = await S3Source.from_uri(f"s3://{TEST_BUCKET}/{TEST_KEY}")
67+
assert len(sources) == 1
68+
assert sources[0].bucket_name == TEST_BUCKET
69+
assert sources[0].key == TEST_KEY
70+
71+
# Test https:// URI
72+
sources = await S3Source.from_uri(f"https://{TEST_BUCKET}.s3.amazonaws.com/{TEST_KEY}")
73+
assert len(sources) == 1
74+
assert sources[0].bucket_name == TEST_BUCKET
75+
assert sources[0].key == TEST_KEY
76+
77+
# Test wildcard pattern
78+
with pytest.raises(ValueError) as exc:
79+
await S3Source.from_uri(f"s3://{TEST_BUCKET}/**/file.txt")
80+
assert "only supports '*' at the end of path" in str(exc.value)
81+
82+
# Test invalid URI
83+
with pytest.raises(ValueError) as exc:
84+
await S3Source.from_uri("invalid-uri")
85+
assert "Invalid AWS Source URI format" in str(exc.value)

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ dev-dependencies = [
3232
"griffe-typingdoc>=0.2.7",
3333
"types-PyYAML>=6.0.2",
3434
"mypy>=1.13.0",
35+
"moto~=4.2.7",
3536
]
3637

3738
[tool.uv.sources]

uv.lock

+57
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)