Skip to content

Commit 37669f0

Browse files
committed
Adding issue-type_summary API
1 parent d9cc01f commit 37669f0

File tree

6 files changed

+268
-10
lines changed

6 files changed

+268
-10
lines changed

src/db.py

+174-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import duckdb
22
from log import get_logger
3-
from schema import IssuesParams, ProvisionParams, SpecificationsParams
3+
from schema import (
4+
CommonParams,
5+
IssuesParams,
6+
SpecificationsParams,
7+
IssueTypeSummaryParams,
8+
)
49
from pagination_model import PaginationParams, PaginatedResult
510
from config import config
611
import json
@@ -54,8 +59,174 @@ def search_issues(params: IssuesParams):
5459
raise e
5560

5661

57-
def search_provision_summary(params: ProvisionParams):
58-
s3_uri = f"s3://{config.collection_bucket}/{config.performance_base_path}/*.parquet" # noqa
62+
def search_provision_summary(params: CommonParams):
63+
s3_uri = f"s3://{config.collection_bucket}/{config.performance_base_path}/provision_summary.parquet" # noqa
64+
65+
where_clause = ""
66+
query_params = []
67+
68+
if params.dataset:
69+
where_clause += _add_condition(where_clause, "dataset = ?")
70+
query_params.append(params.dataset)
71+
72+
if params.organisation:
73+
where_clause += _add_condition(where_clause, "organisation = ?")
74+
query_params.append(params.organisation)
75+
76+
sql_count = f"SELECT COUNT(*) FROM '{s3_uri}' {where_clause}"
77+
sql_results = f"SELECT * FROM '{s3_uri}' {where_clause} LIMIT ? OFFSET ?"
78+
79+
logger.debug(sql_count)
80+
logger.debug(sql_results)
81+
82+
with duckdb.connect() as conn:
83+
try:
84+
if config.use_aws_credential_chain:
85+
logger.debug(
86+
conn.execute(
87+
"CREATE SECRET aws (TYPE S3, PROVIDER CREDENTIAL_CHAIN);"
88+
).fetchall()
89+
)
90+
logger.debug(conn.execute("FROM duckdb_secrets();").fetchall())
91+
92+
# Execute parameterized queries
93+
count = conn.execute(sql_count, query_params).fetchone()[0]
94+
results = (
95+
conn.execute(sql_results, query_params + [params.limit, params.offset])
96+
.arrow()
97+
.to_pylist()
98+
)
99+
100+
return PaginatedResult(
101+
params=PaginationParams(offset=params.offset, limit=params.limit),
102+
total_results_available=count,
103+
data=results,
104+
)
105+
except Exception as e:
106+
logger.exception("Failure executing DuckDB queries")
107+
raise e
108+
109+
110+
def search_issue_type_summary(params: IssueTypeSummaryParams):
111+
s3_uri = f"s3://{config.collection_bucket}/{config.performance_base_path}/endpoint_dataset_issue_type_summary.parquet" # noqa
112+
113+
where_clause = ""
114+
query_params = []
115+
116+
if params.dataset:
117+
where_clause += _add_condition(where_clause, "dataset = ?")
118+
query_params.append(params.dataset)
119+
120+
if params.organisation:
121+
where_clause += _add_condition(where_clause, "organisation = ?")
122+
query_params.append(params.organisation)
123+
124+
if params.issueType:
125+
where_clause += _add_condition(where_clause, "issue_type = ?")
126+
query_params.append(params.issueType)
127+
128+
if params.issueField:
129+
where_clause += _add_condition(where_clause, "issue_field = ?")
130+
query_params.append(params.issueField)
131+
132+
if params.severity:
133+
where_clause += _add_condition(where_clause, "severity = ?")
134+
query_params.append(params.severity)
135+
136+
if params.responsibility:
137+
where_clause += _add_condition(where_clause, "responsibility = ?")
138+
query_params.append(params.responsibility)
139+
140+
sql_count = f"SELECT COUNT(*) FROM '{s3_uri}' {where_clause}"
141+
sql_results = f"""
142+
SELECT organisation, organisation_name, dataset, issue_type, fields,
143+
count_issues, severity, responsibility FROM '{s3_uri}' {where_clause}
144+
LIMIT ? OFFSET ?"""
145+
146+
logger.debug(sql_count)
147+
logger.debug(sql_results)
148+
149+
with duckdb.connect() as conn:
150+
try:
151+
if config.use_aws_credential_chain:
152+
logger.debug(
153+
conn.execute(
154+
"CREATE SECRET aws (TYPE S3, PROVIDER CREDENTIAL_CHAIN);"
155+
).fetchall()
156+
)
157+
logger.debug(conn.execute("FROM duckdb_secrets();").fetchall())
158+
159+
# Execute parameterized queries
160+
count = conn.execute(sql_count, query_params).fetchone()[0]
161+
results = (
162+
conn.execute(sql_results, query_params + [params.limit, params.offset])
163+
.arrow()
164+
.to_pylist()
165+
)
166+
167+
return PaginatedResult(
168+
params=PaginationParams(offset=params.offset, limit=params.limit),
169+
total_results_available=count,
170+
data=results,
171+
)
172+
except Exception as e:
173+
logger.exception("Failure executing DuckDB queries")
174+
raise e
175+
176+
177+
def search_dataset_resource_mapping(params: CommonParams):
178+
s3_uri = f"s3://{config.collection_bucket}/{config.performance_base_path}/endpoint_dataset_resource_summary.parquet" # noqa
179+
180+
where_clause = ""
181+
query_params = []
182+
183+
if params.dataset:
184+
where_clause += _add_condition(where_clause, "dataset = ?")
185+
query_params.append(params.dataset)
186+
187+
if params.organisation:
188+
where_clause += _add_condition(where_clause, "organisation = ?")
189+
query_params.append(params.organisation)
190+
191+
sql_count = f"SELECT COUNT(*) FROM '{s3_uri}' {where_clause}"
192+
sql_results = f"""
193+
SELECT organisation, dataset, resource, mapped_fields FROM
194+
'{s3_uri}' {where_clause}
195+
LIMIT ? OFFSET ?"""
196+
197+
logger.debug(sql_count)
198+
logger.debug(sql_results)
199+
200+
with duckdb.connect() as conn:
201+
try:
202+
if config.use_aws_credential_chain:
203+
logger.debug(
204+
conn.execute(
205+
"CREATE SECRET aws (TYPE S3, PROVIDER CREDENTIAL_CHAIN);"
206+
).fetchall()
207+
)
208+
logger.debug(conn.execute("FROM duckdb_secrets();").fetchall())
209+
210+
# Execute parameterized queries
211+
count = conn.execute(sql_count, query_params).fetchone()[0]
212+
results = (
213+
conn.execute(sql_results, query_params + [params.limit, params.offset])
214+
.arrow()
215+
.to_pylist()
216+
)
217+
218+
return PaginatedResult(
219+
params=PaginationParams(offset=params.offset, limit=params.limit),
220+
total_results_available=count,
221+
data=results,
222+
)
223+
except Exception as e:
224+
logger.exception("Failure executing DuckDB queries")
225+
raise e
226+
227+
228+
def search_endpoint_dataset_summary(params: CommonParams):
229+
s3_uri = f"s3://{config.collection_bucket}/{config.performance_base_path}/endpoint_dataset_summary.parquet" # noqa
59230

60231
where_clause = ""
61232
query_params = []

src/main.py

+50-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88
from fastapi.responses import JSONResponse
99
from schema import (
1010
HealthCheckResponse,
11+
CommonParams,
1112
IssuesParams,
12-
ProvisionParams,
1313
SpecificationsParams,
14+
IssueTypeSummaryParams,
1415
)
1516
from log import get_logger
1617

@@ -58,7 +59,7 @@ def issues(http_response: Response, params: IssuesParams = Depends()):
5859

5960

6061
@app.get("/performance/provision_summary", tags=["provision_summary"])
61-
def provision_summary(http_response: Response, params: ProvisionParams = Depends()):
62+
def provision_summary(http_response: Response, params: CommonParams = Depends()):
6263
paginated_result = db.search_provision_summary(params)
6364
http_response.headers["X-Pagination-Total-Results"] = str(
6465
paginated_result.total_results_available
@@ -72,6 +73,53 @@ def provision_summary(http_response: Response, params: ProvisionParams = Depends
7273
)
7374

7475

76+
@app.get("/performance/issue_type_summary", tags=["issue_type_summary"])
77+
def issue_type_summary(
78+
http_response: Response, params: IssueTypeSummaryParams = Depends()
79+
):
80+
paginated_result = db.search_issue_type_summary(params)
81+
http_response.headers["X-Pagination-Total-Results"] = str(
82+
paginated_result.total_results_available
83+
)
84+
http_response.headers["X-Pagination-Offset"] = str(paginated_result.params.offset)
85+
http_response.headers["X-Pagination-Limit"] = str(paginated_result.params.limit)
86+
return Response(
87+
content=json.dumps(paginated_result.data),
88+
media_type="application/json",
89+
headers=http_response.headers,
90+
)
91+
92+
93+
@app.get("/performance/dataset_resource_mapping", tags=["dataset_resource_mapping"])
94+
def dataset_resource_mapping(http_response: Response, params: CommonParams = Depends()):
95+
paginated_result = db.search_dataset_resource_mapping(params)
96+
http_response.headers["X-Pagination-Total-Results"] = str(
97+
paginated_result.total_results_available
98+
)
99+
http_response.headers["X-Pagination-Offset"] = str(paginated_result.params.offset)
100+
http_response.headers["X-Pagination-Limit"] = str(paginated_result.params.limit)
101+
return Response(
102+
content=json.dumps(paginated_result.data),
103+
media_type="application/json",
104+
headers=http_response.headers,
105+
)
106+
107+
108+
@app.get("/performance/endpoint_dataset_summary", tags=["endpoint_dataset_summary"])
109+
def endpoint_dataset_summary(http_response: Response, params: CommonParams = Depends()):
110+
paginated_result = db.search_endpoint_dataset_summary(params)
111+
http_response.headers["X-Pagination-Total-Results"] = str(
112+
paginated_result.total_results_available
113+
)
114+
http_response.headers["X-Pagination-Offset"] = str(paginated_result.params.offset)
115+
http_response.headers["X-Pagination-Limit"] = str(paginated_result.params.limit)
116+
return Response(
117+
content=json.dumps(paginated_result.data),
118+
media_type="application/json",
119+
headers=http_response.headers,
120+
)
121+
122+
75123
@app.get("/specification/specification", tags=["specification"])
76124
def get_specification(
77125
http_response: Response, params: SpecificationsParams = Depends()

src/schema.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ class BaseParams(BaseModel):
99
limit: int = Field(50, ge=1, le=100)
1010

1111

12+
class CommonParams(BaseParams):
13+
dataset: Optional[str] = Field(None)
14+
organisation: Optional[str] = Field(None)
15+
16+
1217
class IssuesParams(BaseParams):
1318
dataset: Optional[str] = Field(None)
1419
resource: Optional[str] = Field(None)
@@ -37,9 +42,13 @@ class Issue(BaseModel):
3742
message: str
3843

3944

40-
class ProvisionParams(BaseParams):
45+
class IssueTypeSummaryParams(BaseParams):
4146
dataset: Optional[str] = Field(None)
4247
organisation: Optional[str] = Field(None)
48+
issueType: Optional[str] = Field(None)
49+
issueField: Optional[str] = Field(None)
50+
severity: Optional[str] = Field(None)
51+
responsibility: Optional[str] = Field(None)
4352

4453

4554
class SpecificationsParams(BaseParams):

tests/integration/test_main.py

+34-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from fastapi.testclient import TestClient
22
from main import app
3+
from unittest.mock import patch
34
import json
45

56
# Create a test client for the FastAPI app
@@ -74,14 +75,15 @@ def test_provision_summary(s3_bucket):
7475
), "Expected active endpoint count to be 1"
7576

7677

77-
def test_specification(s3_bucket):
78+
def test_specification(s3_bucket, duckdb_connection):
7879
# Prepare test params
7980
params = {
8081
"offset": 0,
8182
"limit": 8,
8283
}
8384

84-
response = client.get("/specification/specification", params=params)
85+
with patch("db.duckdb.connect", return_value=duckdb_connection):
86+
response = client.get("/specification/specification", params=params)
8587

8688
# Validate the results from the search
8789
assert response.status_code == 200
@@ -94,15 +96,16 @@ def test_specification(s3_bucket):
9496
assert len(response_data) > 0
9597

9698

97-
def test_specification_with_dataset(s3_bucket):
99+
def test_specification_with_dataset(s3_bucket, duckdb_connection):
98100
# Prepare test params
99101
params = {
100102
"offset": 0,
101103
"limit": 8,
102104
"dataset": "article-4-direction-area",
103105
}
104106

105-
response = client.get("/specification/specification", params=params)
107+
with patch("db.duckdb.connect", return_value=duckdb_connection):
108+
response = client.get("/specification/specification", params=params)
106109

107110
# Validate the results from the search
108111
assert response.status_code == 200
@@ -116,3 +119,30 @@ def test_specification_with_dataset(s3_bucket):
116119
assert response_data[0]["dataset"] == "article-4-direction-area"
117120
assert response_data[0]["fields"]
118121
assert len(response_data[0]["fields"]) > 1
122+
123+
def test_issue_type_summary(s3_bucket):
124+
# Prepare test params
125+
params = {
126+
"organisation": "local-authority:BUC",
127+
"dataset": "brownfield-land",
128+
"offset": 0,
129+
"limit": 8,
130+
}
131+
response = client.get("/performance/issue_type_summary", params=params)
132+
133+
# Validate the results from the search
134+
assert response.status_code == 200
135+
136+
response_data = response.json()
137+
assert "X-Pagination-Total-Results" in response.headers
138+
assert response.headers["X-Pagination-Total-Results"] == str(11)
139+
assert response.headers["X-Pagination-Limit"] == "8"
140+
141+
assert len(response_data) > 0
142+
filtered_rows = [
143+
item for item in response_data
144+
if item.get("resource") == "8c61c7b72902daeaaa462002e62d840ce3916defacd54db97986654b180ce250"
145+
]
146+
147+
assert len(filtered_rows) == 5
148+
assert sum(1 for item in filtered_rows if item.get("issue_type") == "patch") == 3 # Ensure 3 have issue_type as "patch"

0 commit comments

Comments
 (0)