From 257c7bc20b41fa8903eeb5118d0892297e84f442 Mon Sep 17 00:00:00 2001 From: Samriti Sadhu Date: Tue, 11 Mar 2025 12:02:31 +0000 Subject: [PATCH] Adding provision_end_date in provision_summary table --- bin/load_performance.py | 4 ++-- check/performance.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/load_performance.py b/bin/load_performance.py index d8dbab8..b05deef 100755 --- a/bin/load_performance.py +++ b/bin/load_performance.py @@ -18,7 +18,7 @@ def fetch_provision_data(db_path): conn = sqlite3.connect(db_path) query = """ - select p.organisation, o.name as organisation_name, p.cohort, p.dataset,p.provision_reason from provision p + select p.organisation, o.name as organisation_name, p.cohort, p.dataset,p.provision_reason, p.end_date as provision_end_date from provision p inner join organisation o on o.organisation = p.organisation order by p.organisation """ @@ -131,7 +131,7 @@ def create_performance_tables(merged_data, cf_merged_data, endpoint_summary_data endpoint_summary_table_name, conn, if_exists='replace', index=False) # Filter out endpoints with an end date as we don't want to count them in provision summary - final_result = merged_data.groupby(['organisation', 'organisation_name', 'dataset', 'provision_reason']).agg( + final_result = merged_data.groupby(['organisation', 'organisation_name', 'dataset', 'provision_reason', 'provision_end_date']).agg( active_endpoint_count=pd.NamedAgg( column='endpoint', aggfunc=lambda x: x[(merged_data.loc[x.index, diff --git a/check/performance.py b/check/performance.py index f13ff03..ab4a537 100644 --- a/check/performance.py +++ b/check/performance.py @@ -10,7 +10,7 @@ "endpoint_dataset_issue_type_summary": ['organisation', 'organisation_name', 'cohort', 'dataset', 'collection', 'pipeline', 'endpoint', 'endpoint_url', 'resource', 'resource_start_date', 'resource_end_date', 'latest_log_entry_date', 'count_issues', 'date', 'issue_type', 'severity', 'responsibility', 'field'], "endpoint_dataset_resource_summary": ['organisation', 'organisation_name', 'cohort', 'dataset', 'collection', 'pipeline', 'endpoint', 'endpoint_url', 'resource', 'resource_start_date', 'resource_end_date', 'latest_log_entry_date', 'mapping_field', 'non_mapping_field'], "endpoint_dataset_summary": ['organisation', 'dataset', 'endpoint', 'endpoint_url', 'documentation_url', 'resource', 'latest_status', 'latest_exception', 'latest_log_entry_date', 'entry_date', 'end_date', 'latest_resource_start_date', 'resource_end_date'], - "provision_summary": ['organisation', 'organisation_name', 'dataset', 'provision_reason', 'active_endpoint_count', 'error_endpoint_count', 'count_issue_error_internal', 'count_issue_error_external', 'count_issue_warning_internal', 'count_issue_warning_external', 'count_issue_notice_internal', 'count_issue_notice_external'], + "provision_summary": ['organisation', 'organisation_name', 'dataset', 'provision_reason', 'provision_end_date', 'active_endpoint_count', 'error_endpoint_count', 'count_issue_error_internal', 'count_issue_error_external', 'count_issue_warning_internal', 'count_issue_warning_external', 'count_issue_notice_internal', 'count_issue_notice_external'], "reporting_historic_endpoints": ['organisation', 'name', 'organisation_name', 'dataset', 'collection', 'pipeline', 'endpoint', 'endpoint_url', 'documentation_url', 'licence', 'latest_status', 'latest_exception', 'resource', 'latest_log_entry_date', 'endpoint_entry_date', 'endpoint_end_date', 'resource_start_date', 'resource_end_date'], "reporting_latest_endpoints": ['organisation', 'name', 'organisation_name', 'dataset', 'collection', 'pipeline', 'endpoint', 'endpoint_url', 'licence', 'latest_status', 'days_since_200', 'latest_exception', 'resource', 'latest_log_entry_date', 'endpoint_entry_date', 'endpoint_end_date', 'resource_start_date', 'resource_end_date', 'rn'] }