Skip to content

Commit 3e37bd5

Browse files
Adds check for performance DB (#56)
* adds check_performance * adds rule for check * adds check rule * refactors error message * change print statements to logging * add conn.close() * adds finally block
1 parent 4c74616 commit 3e37bd5

File tree

6 files changed

+50
-0
lines changed

6 files changed

+50
-0
lines changed

.github/workflows/run.yml

+3
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ jobs:
4949
- name: Build dataset
5050
run: make
5151

52+
- name: Check performance database
53+
run: make check-performance
54+
5255
# Development
5356
- name: Configure Development AWS Credentials
5457
uses: aws-actions/configure-aws-credentials@v1-node16

.github/workflows/run_dev.yml

+3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ jobs:
4747
- name: Build dataset
4848
run: make
4949

50+
- name: Check performance database
51+
run: make check-performance
52+
5053
# Development
5154
- name: Configure Development AWS Credentials
5255
uses: aws-actions/configure-aws-credentials@v1-node16

.github/workflows/run_performance.yml

+3
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ jobs:
4949
- name: Build performance dataset
5050
run: make third-pass
5151

52+
- name: Check performance database
53+
run: make check-performance
54+
5255
# Development
5356
- name: Configure Development AWS Credentials
5457
uses: aws-actions/configure-aws-credentials@v1-node16

.github/workflows/run_performance_dev.yml

+3
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ jobs:
4949
- name: Build performance dataset
5050
run: make third-pass
5151

52+
- name: Check performance database
53+
run: make check-performance
54+
5255
# Development
5356
- name: Configure Development AWS Credentials
5457
uses: aws-actions/configure-aws-credentials@v1-node16

Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,6 @@ specification::
9797
curl -qfsL '$(SOURCE_URL)/specification/main/specification/provision.csv' > specification/provision.csv
9898
curl -qfsL '$(SOURCE_URL)/specification/main/specification/provision-rule.csv' > specification/provision-rule.csv
9999
curl -qfsL '$(SOURCE_URL)/specification/main/specification/provision-reason.csv' > specification/provision-reason.csv
100+
101+
check-performance::
102+
python check/performance.py

check/performance.py

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import logging
2+
import sqlite3
3+
from digital_land.expectations.operation import (
4+
check_columns
5+
)
6+
7+
logger = logging.getLogger("__name__")
8+
9+
EXPECTED = {
10+
"endpoint_dataset_issue_type_summary": ['organisation', 'organisation_name', 'cohort', 'dataset', 'collection', 'pipeline', 'endpoint', 'endpoint_url', 'resource', 'resource_start_date', 'resource_end_date', 'latest_log_entry_date', 'count_issues', 'date', 'issue_type', 'severity', 'responsibility', 'fields'],
11+
"endpoint_dataset_resource_summary": ['organisation', 'organisation_name', 'cohort', 'dataset', 'collection', 'pipeline', 'endpoint', 'endpoint_url', 'resource', 'resource_start_date', 'resource_end_date', 'latest_log_entry_date', 'mapping_field', 'non_mapping_field'],
12+
"endpoint_dataset_summary": ['organisation', 'dataset', 'endpoint', 'endpoint_url', 'resource', 'latest_status', 'latest_exception', 'latest_log_entry_date', 'entry_date', 'end_date', 'latest_resource_start_date', 'resource_end_date'],
13+
"provision_summary": ['organisation', 'organisation_name', 'dataset', 'provision_reason', 'active_endpoint_count', 'error_endpoint_count', 'count_issue_error_internal', 'count_issue_error_external', 'count_issue_warning_internal', 'count_issue_warning_external', 'count_issue_notice_internal', 'count_issue_notice_external'],
14+
"reporting_historic_endpoints": ['organisation', 'name', 'organisation_name', 'dataset', 'collection', 'pipeline', 'endpoint', 'endpoint_url', 'licence', 'latest_status', 'latest_exception', 'resource', 'latest_log_entry_date', 'endpoint_entry_date', 'endpoint_end_date', 'resource_start_date', 'resource_end_date'],
15+
"reporting_latest_endpoints": ['organisation', 'name', 'organisation_name', 'dataset', 'collection', 'pipeline', 'endpoint', 'endpoint_url', 'licence', 'latest_status', 'days_since_200', 'latest_exception', 'resource', 'latest_log_entry_date', 'endpoint_entry_date', 'endpoint_end_date', 'resource_start_date', 'resource_end_date', 'rn']
16+
}
17+
18+
def check_performance_columns():
19+
try:
20+
conn = sqlite3.connect("dataset/performance.sqlite3").cursor()
21+
result, message, details = check_columns(conn, EXPECTED)
22+
if not result:
23+
logging.error("Column check failed for performance DB")
24+
logging.error(message)
25+
for item in details:
26+
if not item["success"]:
27+
logging.error(f"{item['table']} did not have all expected columns. Missing columns: {item['missing']}")
28+
logging.error(f"Columns found: {item['actual']}")
29+
raise Exception(f"Performance DB check failed: {message}")
30+
finally:
31+
conn.close()
32+
33+
34+
if __name__ == "__main__":
35+
check_performance_columns()

0 commit comments

Comments
 (0)