diff --git a/README.md b/README.md index ecff8dd..24e92d7 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ In order to build and test the software outside of Docker, you will need You can run the API locally by running either `make compose-up` or `docker compose up -d --build`. -The docker compose setup runs the S3 locally using Localstack as well as the API. An S3 bucket called local-collection-data is created and seeded with example issue log data. +The docker compose setup runs the S3 locally using Localstack as well as the API. An S3 bucket called local-collection-data is created and seeded with example files in the collection-data directory. ## Swagger UI @@ -69,3 +69,28 @@ Request for issues for a specific dataset and resource: curl http://localhost:8000/log/issue?dataset=border&resource=4a57239e3c1174c80b6d4a0278ab386a7c3664f2e985b2e07a66bbec84988b30&field=geometry ``` +### provision_summary endpoint + +can be accessed via +``` +http://localhost:8000/performance/provision_summary?organisation=local-authority:LBH&offset=50&limit=100 +``` + +Optional Parameters: + * Offset + * Limit + * Organisation + * Dataset + + +### specification endpoint + +can be accessed via +``` +http://localhost:8000/specification/specification?offset=0&limit=10 +``` + +Optional Parameters: + * Offset + * Limit + * Dataset \ No newline at end of file diff --git a/src/db.py b/src/db.py index 671ae1e..b953627 100644 --- a/src/db.py +++ b/src/db.py @@ -3,7 +3,7 @@ from schema import IssuesParams, ProvisionParams, SpecificationsParams from pagination_model import PaginationParams, PaginatedResult from config import config - +import json logger = get_logger(__name__) @@ -56,19 +56,22 @@ def search_issues(params: IssuesParams): def search_provision_summary(params: ProvisionParams): s3_uri = f"s3://{config.collection_bucket}/{config.performance_base_path}/*.parquet" # noqa - pagination = f"LIMIT {params.limit} OFFSET {params.offset}" where_clause = "" + query_params = [] + if params.dataset: - where_clause += _add_condition(where_clause, f"dataset = '{params.dataset}'") + where_clause += _add_condition(where_clause, "dataset = ?") + query_params.append(params.dataset) + if params.organisation: - where_clause += _add_condition( - where_clause, f"organisation = '{params.organisation}'" - ) + where_clause += _add_condition(where_clause, "organisation = ?") + query_params.append(params.organisation) sql_count = f"SELECT COUNT(*) FROM '{s3_uri}' {where_clause}" + sql_results = f"SELECT * FROM '{s3_uri}' {where_clause} LIMIT ? OFFSET ?" + logger.debug(sql_count) - sql_results = f"SELECT * FROM '{s3_uri}' {where_clause} {pagination}" logger.debug(sql_results) with duckdb.connect() as conn: @@ -80,33 +83,53 @@ def search_provision_summary(params: ProvisionParams): ).fetchall() ) logger.debug(conn.execute("FROM duckdb_secrets();").fetchall()) - count = conn.execute(sql_count).fetchone()[ - 0 - ] # Count is first item in Tuple - results = conn.execute(sql_results).arrow().to_pylist() + + # Execute parameterized queries + count = conn.execute(sql_count, query_params).fetchone()[0] + results = ( + conn.execute(sql_results, query_params + [params.limit, params.offset]) + .arrow() + .to_pylist() + ) + return PaginatedResult( params=PaginationParams(offset=params.offset, limit=params.limit), total_results_available=count, data=results, ) except Exception as e: - logger.exception( - "Failure executing DuckDB queries", - ) + logger.exception("Failure executing DuckDB queries") raise e def get_specification(params: SpecificationsParams): s3_uri = f"s3://{config.collection_bucket}/{config.specification_base_path}/*.parquet" # noqa - pagination = f"LIMIT {params.limit} OFFSET {params.offset}" where_clause = "" + query_params = {} + if params.dataset: - where_clause += _add_condition(where_clause, f"dataset = '{params.dataset}'") + where_clause += _add_condition( + where_clause, + "TRIM(BOTH '\"' FROM json_extract(json(value), '$.dataset')) = ?", + ) + query_params["dataset"] = params.dataset + + sql_count = f""" + SELECT COUNT(*) FROM ( + SELECT unnest(CAST(json AS VARCHAR[])) AS value + FROM '{s3_uri}') AS parsed_json {where_clause} + LIMIT ? OFFSET ? + """ + + sql_results = f""" + SELECT value AS json FROM ( + SELECT unnest(CAST(json AS VARCHAR[])) AS value + FROM '{s3_uri}') AS parsed_json {where_clause} + LIMIT ? OFFSET ? + """ - sql_count = f"SELECT COUNT(*) FROM '{s3_uri}' {where_clause}" logger.debug(sql_count) - sql_results = f"SELECT * FROM '{s3_uri}' {where_clause} {pagination}" logger.debug(sql_results) with duckdb.connect() as conn: @@ -118,19 +141,36 @@ def get_specification(params: SpecificationsParams): ).fetchall() ) logger.debug(conn.execute("FROM duckdb_secrets();").fetchall()) - count = conn.execute(sql_count).fetchone()[ - 0 - ] # Count is first item in Tuple - results = conn.execute(sql_results).arrow().to_pylist() + + # Execute queries with parameters + count = conn.execute( + sql_count, [*query_params.values(), params.limit, params.offset] + ).fetchone()[0] + results = ( + conn.execute( + sql_results, [*query_params.values(), params.limit, params.offset] + ) + .arrow() + .to_pylist() + ) + + # Convert JSON strings to actual JSON objects + json_results = [] + for item in results: + if "json" in item and isinstance(item["json"], str): + try: + parsed_json = json.loads(item["json"]) + json_results.append(parsed_json) + except json.JSONDecodeError: + logger.warning(f"Invalid JSON format in row: {item['json']}") + return PaginatedResult( params=PaginationParams(offset=params.offset, limit=params.limit), total_results_available=count, - data=results, + data=json_results, ) except Exception as e: - logger.exception( - "Failure executing DuckDB queries", - ) + logger.exception("Failure executing DuckDB queries") raise e diff --git a/tests/integration/test_main.py b/tests/integration/test_main.py index 1c32878..286b3dd 100644 --- a/tests/integration/test_main.py +++ b/tests/integration/test_main.py @@ -88,8 +88,31 @@ def test_specification(s3_bucket): response_data = response.json() assert "X-Pagination-Total-Results" in response.headers - assert response.headers["X-Pagination-Total-Results"] == str(16) + assert response.headers["X-Pagination-Total-Results"] == str(36) assert response.headers["X-Pagination-Limit"] == "8" assert len(response_data) > 0 - assert response_data[0]["name"] == "Article 4 direction" + + +def test_specification_with_dataset(s3_bucket): + # Prepare test params + params = { + "offset": 0, + "limit": 8, + "dataset": "article-4-direction-area", + } + + response = client.get("/specification/specification", params=params) + + # Validate the results from the search + assert response.status_code == 200 + + response_data = response.json() + assert "X-Pagination-Total-Results" in response.headers + assert response.headers["X-Pagination-Total-Results"] == str(1) + assert response.headers["X-Pagination-Limit"] == "8" + + assert len(response_data) > 0 + assert response_data[0]["dataset"] == "article-4-direction-area" + assert response_data[0]["fields"] + assert len(response_data[0]["fields"]) > 1