diff --git a/digital_land/expectations/operation.py b/digital_land/expectations/operation.py index 453d9a38..934b05ef 100644 --- a/digital_land/expectations/operation.py +++ b/digital_land/expectations/operation.py @@ -186,3 +186,41 @@ def count_deleted_entities( } return result, message, details + + +def check_columns(conn, expected: dict): + # This operation checks that the db connection provided contains the tables with the expected columns provided + + # expected: a dictionary containing table names as keys, with a list of their expected columns as the value + + details = [] + success_count = 0 + failure_count = 0 + for k, v in expected.items(): + table_name = k + expected_columns = v + sql = f""" + PRAGMA table_info({table_name}) + """ + rows = conn.execute(sql).fetchall() + actual = [row[1] for row in rows] + success = set(expected_columns).issubset(set(actual)) + missing = list(set(expected_columns) - set(actual)) + details.append( + { + "table": table_name, + "success": success, + "missing": missing, + "actual": actual, + "expected": expected_columns, + } + ) + if success: + success_count += 1 + else: + failure_count += 1 + + result = False if failure_count > 0 else True + message = f"{success_count} out of {success_count + failure_count} tables had expected columns" + + return result, message, details diff --git a/tests/integration/expectations/test_operation.py b/tests/integration/expectations/test_operation.py index 1efe87d9..3fc72d12 100644 --- a/tests/integration/expectations/test_operation.py +++ b/tests/integration/expectations/test_operation.py @@ -1,8 +1,10 @@ import spatialite +import sqlite3 import pytest import pandas as pd from digital_land.expectations.operation import ( + check_columns, count_lpa_boundary, count_deleted_entities, ) @@ -174,3 +176,67 @@ def test_count_deleted_entities(dataset_path, mocker): for key in detail_keys: assert key in details, f"{key} missing from details" assert "1002" in details["entities"] + + +def test_check_columns(dataset_path): + expected = { + "entity": [ + "dataset", + "end_date", + "entity", + "entry_date", + "geojson", + "geometry", + "json", + "name", + "organisation_entity", + "point", + "prefix", + "reference", + "start_date", + "typology", + ], + "old_entity": ["old_entity", "entity"], + } + + with sqlite3.connect(dataset_path) as conn: + result, message, details = check_columns(conn.cursor(), expected) + + assert result + assert "2 out of 2 tables had expected columns" in message + + assert details[0]["table"] == "entity" + assert any(x in details[0]["actual"] for x in expected["entity"]) + assert any(x in details[0]["expected"] for x in expected["entity"]) + + +def test_check_columns_failure(dataset_path): + expected = { + "entity": [ + "missing", + "columns", + "dataset", + "end_date", + "entity", + "entry_date", + "geojson", + "geometry", + "json", + "name", + "organisation_entity", + "point", + "prefix", + "reference", + "start_date", + "typology", + ], + "old_entity": ["old_entity", "entity"], + } + + with sqlite3.connect(dataset_path) as conn: + result, message, details = check_columns(conn.cursor(), expected) + assert not result + assert "1 out of 2 tables had expected columns" in message + assert not details[0]["success"] + assert "missing" in details[0]["missing"] + assert "columns" in details[0]["missing"]