|
| 1 | +import os |
| 2 | +from collections.abc import Generator |
| 3 | +from pathlib import Path |
| 4 | + |
| 5 | +import boto3 |
| 6 | +import pytest |
| 7 | +from moto import mock_s3 |
| 8 | + |
| 9 | +from ragbits.document_search.documents.sources import S3Source |
| 10 | +from ragbits.document_search.documents.sources.base import LOCAL_STORAGE_DIR_ENV |
| 11 | + |
| 12 | +os.environ[LOCAL_STORAGE_DIR_ENV] = Path(__file__).parent.as_posix() |
| 13 | + |
| 14 | +TEST_BUCKET = "test-bucket" |
| 15 | +TEST_KEY = "test-file.txt" |
| 16 | +TEST_CONTENT = "Hello, this is a test file!" |
| 17 | +TEST_REGION = "us-east-1" |
| 18 | + |
| 19 | + |
| 20 | +@pytest.fixture |
| 21 | +def s3_mock() -> Generator[boto3.client, None, None]: |
| 22 | + """Create a mock S3 environment.""" |
| 23 | + with mock_s3(): |
| 24 | + s3 = boto3.client("s3", region_name=TEST_REGION) |
| 25 | + s3.create_bucket(Bucket=TEST_BUCKET) |
| 26 | + s3.put_object(Bucket=TEST_BUCKET, Key=TEST_KEY, Body=TEST_CONTENT) |
| 27 | + yield s3 |
| 28 | + |
| 29 | + |
| 30 | +async def test_s3_source_fetch(s3_mock: boto3.client): |
| 31 | + """Test fetching a file from S3.""" |
| 32 | + source = S3Source(bucket_name=TEST_BUCKET, key=TEST_KEY) |
| 33 | + path = await source.fetch() |
| 34 | + |
| 35 | + assert path.is_file() |
| 36 | + assert path.read_text() == TEST_CONTENT |
| 37 | + path.unlink() |
| 38 | + |
| 39 | + |
| 40 | +async def test_s3_source_fetch_not_found(s3_mock: boto3.client): |
| 41 | + """Test fetching a non-existent file from S3.""" |
| 42 | + source = S3Source(bucket_name=TEST_BUCKET, key="non-existent.txt") |
| 43 | + |
| 44 | + with pytest.raises(FileNotFoundError) as exc: |
| 45 | + await source.fetch() |
| 46 | + |
| 47 | + assert "The object does not exist" in str(exc.value) |
| 48 | + |
| 49 | + |
| 50 | +async def test_s3_source_list_sources(s3_mock: boto3.client): |
| 51 | + """Test listing sources from S3.""" |
| 52 | + s3_mock.put_object(Bucket=TEST_BUCKET, Key="folder1/file1.txt", Body="test1") |
| 53 | + s3_mock.put_object(Bucket=TEST_BUCKET, Key="folder1/file2.txt", Body="test2") |
| 54 | + |
| 55 | + sources = await S3Source.list_sources(bucket_name=TEST_BUCKET, prefix="folder1/") |
| 56 | + |
| 57 | + assert len(sources) == 2 |
| 58 | + source_ids = {source.id for source in sources} |
| 59 | + assert f"s3://{TEST_BUCKET}/folder1/file1.txt" in source_ids |
| 60 | + assert f"s3://{TEST_BUCKET}/folder1/file2.txt" in source_ids |
| 61 | + |
| 62 | + |
| 63 | +async def test_s3_source_from_uri(): |
| 64 | + """Test creating S3Source from URI.""" |
| 65 | + # Test s3:// URI |
| 66 | + sources = await S3Source.from_uri(f"s3://{TEST_BUCKET}/{TEST_KEY}") |
| 67 | + assert len(sources) == 1 |
| 68 | + assert sources[0].bucket_name == TEST_BUCKET |
| 69 | + assert sources[0].key == TEST_KEY |
| 70 | + |
| 71 | + # Test https:// URI |
| 72 | + sources = await S3Source.from_uri(f"https://{TEST_BUCKET}.s3.amazonaws.com/{TEST_KEY}") |
| 73 | + assert len(sources) == 1 |
| 74 | + assert sources[0].bucket_name == TEST_BUCKET |
| 75 | + assert sources[0].key == TEST_KEY |
| 76 | + |
| 77 | + # Test wildcard pattern |
| 78 | + with pytest.raises(ValueError) as exc: |
| 79 | + await S3Source.from_uri(f"s3://{TEST_BUCKET}/**/file.txt") |
| 80 | + assert "only supports '*' at the end of path" in str(exc.value) |
| 81 | + |
| 82 | + # Test invalid URI |
| 83 | + with pytest.raises(ValueError) as exc: |
| 84 | + await S3Source.from_uri("invalid-uri") |
| 85 | + assert "Invalid AWS Source URI format" in str(exc.value) |
0 commit comments