|
| 1 | +import os |
1 | 2 | import pytest
|
2 | 3 | import duckdb
|
3 |
| -import os |
4 | 4 | import boto3
|
5 |
| -from moto import mock_aws |
| 5 | +from testcontainers.localstack import LocalStackContainer |
6 | 6 | from botocore.exceptions import ClientError
|
7 | 7 |
|
8 | 8 |
|
9 |
| - |
10 |
| - |
11 |
| -os.environ["AWS_DEFAULT_REGION"] = "eu-west-2" |
12 |
| -os.environ["AWS_ACCESS_KEY_ID"] = "testing" |
13 |
| -os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" |
14 |
| -os.environ["AWS_SECURITY_TOKEN"] = "testing" |
15 |
| -os.environ["AWS_SESSION_TOKEN"] = "testing" |
| 9 | +os.environ["AWS_ACCESS_KEY_ID"] = "test" |
| 10 | +os.environ["AWS_SECRET_ACCESS_KEY"] = "test" |
| 11 | +os.environ["AWS_DEFAULT_REGION"] = "us-east-1" |
16 | 12 | os.environ["COLLECTION_BUCKET"] = "test-bucket"
|
17 | 13 | os.environ["ISSUES_BASE_PATH"] = "test/path"
|
18 |
| -os.environ["USE_AWS_CREDENTIAL_CHAIN"] = "false" |
19 |
| - |
20 |
| - |
21 |
| - |
22 | 14 |
|
23 | 15 | @pytest.fixture(scope="module")
|
24 |
| -def duckdb_connection(): |
25 |
| - """ |
26 |
| - Fixture to provide a DuckDB in-memory database connection. |
27 |
| - """ |
28 |
| - conn = duckdb.connect(":memory:") # In-memory database for testing |
29 |
| - conn.execute(f"SET s3_endpoint = 'localstack:4566';") |
30 |
| - conn.execute(f"SET s3_access_key_id = '{os.getenv('AWS_ACCESS_KEY_ID')}';") |
31 |
| - conn.execute(f"SET s3_secret_access_key = '{os.getenv('AWS_SECRET_ACCESS_KEY')}';") |
32 |
| - conn.execute(f"SET s3_region = '{os.getenv('AWS_DEFAULT_REGION')}';") |
33 |
| - conn.execute("SET s3_use_ssl=false;") |
34 |
| - conn.execute("SET s3_url_style = 'path';") |
35 |
| - yield conn |
36 |
| - conn.close() |
37 |
| - |
38 |
| - |
| 16 | +def localstack_container(): |
| 17 | + # Start LocalStack container |
| 18 | + with LocalStackContainer(image="localstack/localstack:2.0.1") as localstack: |
| 19 | + # Wait for the service to be ready |
| 20 | + yield localstack |
39 | 21 |
|
40 | 22 | @pytest.fixture(scope="module")
|
41 |
| -def s3_client(): |
42 |
| - """ |
43 |
| - Fixture to provide an S3 client connected to LocalStack. |
44 |
| - """ |
45 |
| - # Create an S3 client with the LocalStack endpoint |
| 23 | +def s3_client(localstack_container): |
| 24 | + # Create an S3 client using the LocalStack endpoint |
46 | 25 | s3 = boto3.client(
|
47 | 26 | "s3",
|
48 |
| - endpoint_url="http://localstack:4566", # LocalStack S3 endpoint |
| 27 | + endpoint_url=localstack_container.get_url(), |
49 | 28 | region_name=os.environ["AWS_DEFAULT_REGION"],
|
50 |
| - aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), |
51 |
| - aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), |
| 29 | + aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"], |
| 30 | + aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"] |
52 | 31 | )
|
53 | 32 | return s3
|
54 | 33 |
|
| 34 | +@pytest.fixture(scope="module") |
| 35 | +def duckdb_connection(localstack_container): |
| 36 | + |
| 37 | + with LocalStackContainer() as localstack: |
| 38 | + localstack_hostname = localstack.get_container_host_ip() |
| 39 | + # Set up a DuckDB in-memory database |
| 40 | + conn = duckdb.connect(":memory:") |
| 41 | + # Configure DuckDB to connect to S3 via LocalStack |
| 42 | + conn.execute(f"SET s3_endpoint = '{localstack_hostname}:{4566}';") |
| 43 | + conn.execute(f"SET s3_access_key_id = '{os.getenv('AWS_ACCESS_KEY_ID')}';") |
| 44 | + conn.execute(f"SET s3_secret_access_key = '{os.getenv('AWS_SECRET_ACCESS_KEY')}';") |
| 45 | + conn.execute(f"SET s3_region = '{os.getenv('AWS_DEFAULT_REGION')}';") |
| 46 | + conn.execute("SET s3_use_ssl = FALSE;") |
| 47 | + yield conn |
| 48 | + conn.close() |
55 | 49 |
|
56 | 50 | @pytest.fixture(scope="module")
|
57 | 51 | def s3_bucket(s3_client):
|
58 |
| - """ |
59 |
| - Fixture to set up a test bucket and upload a Parquet file in LocalStack. |
60 |
| - """ |
| 52 | + # Create a bucket in LocalStack for the test |
61 | 53 | bucket_name = os.environ["COLLECTION_BUCKET"]
|
62 |
| - parquet_file = "tests/files/issues.parquet" |
63 |
| - |
64 |
| - # Check if the bucket exists |
65 |
| - existing_buckets = s3_client.list_buckets().get("Buckets", []) |
66 |
| - if not any(bucket["Name"] == bucket_name for bucket in existing_buckets): |
67 |
| - # Create the bucket if it doesn't exist |
68 |
| - s3_client.create_bucket( |
69 |
| - Bucket=bucket_name, |
70 |
| - CreateBucketConfiguration={"LocationConstraint": os.environ["AWS_DEFAULT_REGION"]}, |
71 |
| - ) |
72 |
| - |
73 |
| - # Delete any pre-existing objects in the bucket |
74 |
| - objects = s3_client.list_objects_v2(Bucket=bucket_name).get("Contents", []) |
75 |
| - for obj in objects: |
76 |
| - s3_client.delete_object(Bucket=bucket_name, Key=obj["Key"]) |
77 |
| - |
78 |
| - # Upload the Parquet file |
79 |
| - with open(parquet_file, "rb") as file: |
80 |
| - s3_client.put_object( |
81 |
| - Bucket=bucket_name, |
82 |
| - Key=f"{os.environ['ISSUES_BASE_PATH']}/issues.parquet", |
83 |
| - Body=file, |
84 |
| - ) |
85 |
| - |
| 54 | + try: |
| 55 | + s3_client.create_bucket(Bucket=bucket_name) |
| 56 | + except ClientError: |
| 57 | + pass # Ignore if bucket already exists |
| 58 | + |
| 59 | + # Upload a Parquet file to the bucket |
| 60 | + parquet_file = "tests/files/issues.parquet" |
| 61 | + with open(parquet_file, "rb") as f: |
| 62 | + s3_client.put_object(Bucket=bucket_name, Key="test/path/issues.parquet", Body=f) |
| 63 | + |
86 | 64 | yield s3_client
|
87 |
| - |
88 |
| -@pytest.fixture |
89 |
| -def s3_uri(): |
90 |
| - """ |
91 |
| - Fixture that provides the s3 URI for use in tests. |
92 |
| - """ |
93 |
| - bucket_name = os.environ["COLLECTION_BUCKET"] |
94 |
| - base_path = os.environ["ISSUES_BASE_PATH"] |
95 |
| - s3_uri = f"s3://{bucket_name}/{base_path}/**/*.parquet" |
96 |
| - return s3_uri |
97 |
| - |
98 |
| - |
0 commit comments