Skip to content

Commit 27b67a2

Browse files
authored
Merge pull request #4 from digital-land/new_APIs
/Specification API
2 parents 7e6050a + d9cc01f commit 27b67a2

File tree

3 files changed

+117
-29
lines changed

3 files changed

+117
-29
lines changed

README.md

+26-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ In order to build and test the software outside of Docker, you will need
1919

2020
You can run the API locally by running either `make compose-up` or `docker compose up -d --build`.
2121

22-
The docker compose setup runs the S3 locally using Localstack as well as the API. An S3 bucket called local-collection-data is created and seeded with example issue log data.
22+
The docker compose setup runs the S3 locally using Localstack as well as the API. An S3 bucket called local-collection-data is created and seeded with example files in the collection-data directory.
2323

2424

2525
## Swagger UI
@@ -69,3 +69,28 @@ Request for issues for a specific dataset and resource:
6969
curl http://localhost:8000/log/issue?dataset=border&resource=4a57239e3c1174c80b6d4a0278ab386a7c3664f2e985b2e07a66bbec84988b30&field=geometry
7070
```
7171

72+
### provision_summary endpoint
73+
74+
can be accessed via
75+
```
76+
http://localhost:8000/performance/provision_summary?organisation=local-authority:LBH&offset=50&limit=100
77+
```
78+
79+
Optional Parameters:
80+
* Offset
81+
* Limit
82+
* Organisation
83+
* Dataset
84+
85+
86+
### specification endpoint
87+
88+
can be accessed via
89+
```
90+
http://localhost:8000/specification/specification?offset=0&limit=10
91+
```
92+
93+
Optional Parameters:
94+
* Offset
95+
* Limit
96+
* Dataset

src/db.py

+66-26
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from schema import IssuesParams, ProvisionParams, SpecificationsParams
44
from pagination_model import PaginationParams, PaginatedResult
55
from config import config
6-
6+
import json
77

88
logger = get_logger(__name__)
99

@@ -56,19 +56,22 @@ def search_issues(params: IssuesParams):
5656

5757
def search_provision_summary(params: ProvisionParams):
5858
s3_uri = f"s3://{config.collection_bucket}/{config.performance_base_path}/*.parquet" # noqa
59-
pagination = f"LIMIT {params.limit} OFFSET {params.offset}"
6059

6160
where_clause = ""
61+
query_params = []
62+
6263
if params.dataset:
63-
where_clause += _add_condition(where_clause, f"dataset = '{params.dataset}'")
64+
where_clause += _add_condition(where_clause, "dataset = ?")
65+
query_params.append(params.dataset)
66+
6467
if params.organisation:
65-
where_clause += _add_condition(
66-
where_clause, f"organisation = '{params.organisation}'"
67-
)
68+
where_clause += _add_condition(where_clause, "organisation = ?")
69+
query_params.append(params.organisation)
6870

6971
sql_count = f"SELECT COUNT(*) FROM '{s3_uri}' {where_clause}"
72+
sql_results = f"SELECT * FROM '{s3_uri}' {where_clause} LIMIT ? OFFSET ?"
73+
7074
logger.debug(sql_count)
71-
sql_results = f"SELECT * FROM '{s3_uri}' {where_clause} {pagination}"
7275
logger.debug(sql_results)
7376

7477
with duckdb.connect() as conn:
@@ -80,33 +83,53 @@ def search_provision_summary(params: ProvisionParams):
8083
).fetchall()
8184
)
8285
logger.debug(conn.execute("FROM duckdb_secrets();").fetchall())
83-
count = conn.execute(sql_count).fetchone()[
84-
0
85-
] # Count is first item in Tuple
86-
results = conn.execute(sql_results).arrow().to_pylist()
86+
87+
# Execute parameterized queries
88+
count = conn.execute(sql_count, query_params).fetchone()[0]
89+
results = (
90+
conn.execute(sql_results, query_params + [params.limit, params.offset])
91+
.arrow()
92+
.to_pylist()
93+
)
94+
8795
return PaginatedResult(
8896
params=PaginationParams(offset=params.offset, limit=params.limit),
8997
total_results_available=count,
9098
data=results,
9199
)
92100
except Exception as e:
93-
logger.exception(
94-
"Failure executing DuckDB queries",
95-
)
101+
logger.exception("Failure executing DuckDB queries")
96102
raise e
97103

98104

99105
def get_specification(params: SpecificationsParams):
100106
s3_uri = f"s3://{config.collection_bucket}/{config.specification_base_path}/*.parquet" # noqa
101-
pagination = f"LIMIT {params.limit} OFFSET {params.offset}"
102107

103108
where_clause = ""
109+
query_params = {}
110+
104111
if params.dataset:
105-
where_clause += _add_condition(where_clause, f"dataset = '{params.dataset}'")
112+
where_clause += _add_condition(
113+
where_clause,
114+
"TRIM(BOTH '\"' FROM json_extract(json(value), '$.dataset')) = ?",
115+
)
116+
query_params["dataset"] = params.dataset
117+
118+
sql_count = f"""
119+
SELECT COUNT(*) FROM (
120+
SELECT unnest(CAST(json AS VARCHAR[])) AS value
121+
FROM '{s3_uri}') AS parsed_json {where_clause}
122+
LIMIT ? OFFSET ?
123+
"""
124+
125+
sql_results = f"""
126+
SELECT value AS json FROM (
127+
SELECT unnest(CAST(json AS VARCHAR[])) AS value
128+
FROM '{s3_uri}') AS parsed_json {where_clause}
129+
LIMIT ? OFFSET ?
130+
"""
106131

107-
sql_count = f"SELECT COUNT(*) FROM '{s3_uri}' {where_clause}"
108132
logger.debug(sql_count)
109-
sql_results = f"SELECT * FROM '{s3_uri}' {where_clause} {pagination}"
110133
logger.debug(sql_results)
111134

112135
with duckdb.connect() as conn:
@@ -118,19 +141,36 @@ def get_specification(params: SpecificationsParams):
118141
).fetchall()
119142
)
120143
logger.debug(conn.execute("FROM duckdb_secrets();").fetchall())
121-
count = conn.execute(sql_count).fetchone()[
122-
0
123-
] # Count is first item in Tuple
124-
results = conn.execute(sql_results).arrow().to_pylist()
144+
145+
# Execute queries with parameters
146+
count = conn.execute(
147+
sql_count, [*query_params.values(), params.limit, params.offset]
148+
).fetchone()[0]
149+
results = (
150+
conn.execute(
151+
sql_results, [*query_params.values(), params.limit, params.offset]
152+
)
153+
.arrow()
154+
.to_pylist()
155+
)
156+
157+
# Convert JSON strings to actual JSON objects
158+
json_results = []
159+
for item in results:
160+
if "json" in item and isinstance(item["json"], str):
161+
try:
162+
parsed_json = json.loads(item["json"])
163+
json_results.append(parsed_json)
164+
except json.JSONDecodeError:
165+
logger.warning(f"Invalid JSON format in row: {item['json']}")
166+
125167
return PaginatedResult(
126168
params=PaginationParams(offset=params.offset, limit=params.limit),
127169
total_results_available=count,
128-
data=results,
170+
data=json_results,
129171
)
130172
except Exception as e:
131-
logger.exception(
132-
"Failure executing DuckDB queries",
133-
)
173+
logger.exception("Failure executing DuckDB queries")
134174
raise e
135175

136176

tests/integration/test_main.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,31 @@ def test_specification(s3_bucket):
8888

8989
response_data = response.json()
9090
assert "X-Pagination-Total-Results" in response.headers
91-
assert response.headers["X-Pagination-Total-Results"] == str(16)
91+
assert response.headers["X-Pagination-Total-Results"] == str(36)
9292
assert response.headers["X-Pagination-Limit"] == "8"
9393

9494
assert len(response_data) > 0
95-
assert response_data[0]["name"] == "Article 4 direction"
95+
96+
97+
def test_specification_with_dataset(s3_bucket):
98+
# Prepare test params
99+
params = {
100+
"offset": 0,
101+
"limit": 8,
102+
"dataset": "article-4-direction-area",
103+
}
104+
105+
response = client.get("/specification/specification", params=params)
106+
107+
# Validate the results from the search
108+
assert response.status_code == 200
109+
110+
response_data = response.json()
111+
assert "X-Pagination-Total-Results" in response.headers
112+
assert response.headers["X-Pagination-Total-Results"] == str(1)
113+
assert response.headers["X-Pagination-Limit"] == "8"
114+
115+
assert len(response_data) > 0
116+
assert response_data[0]["dataset"] == "article-4-direction-area"
117+
assert response_data[0]["fields"]
118+
assert len(response_data[0]["fields"]) > 1

0 commit comments

Comments
 (0)