From 0f5cba982a41333dabfba99ae7f42c05ae54d0ac Mon Sep 17 00:00:00 2001 From: Colton Loftus <70598503+C-Loftus@users.noreply.github.com> Date: Wed, 5 Mar 2025 17:06:43 -0500 Subject: [PATCH] add prov test (#124) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- tests/helpers.py | 2 +- tests/main_test.py | 35 ++++++++++++++++++++++++----------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 8e47d5ee..49d0e937 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -11,7 +11,7 @@ class SparqlClient: - def __init__(self, repository: Literal["iow", "prov"] = "iow"): + def __init__(self, repository: Literal["iow", "iowprov"] = "iow"): GRAPH_URL_IN_TESTING = "http://localhost:7200/repositories/" self.url = GRAPH_URL_IN_TESTING + repository diff --git a/tests/main_test.py b/tests/main_test.py index 269beff7..b97d4cd1 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -54,7 +54,8 @@ def assert_rclone_is_installed_properly(): def test_e2e(): """Run the e2e test on the entire geoconnex graph""" - SparqlClient().clear_graph() + SparqlClient("iow").clear_graph() + SparqlClient("iowprov").clear_graph() # insert a dummy graph before running that should be dropped after syncing ref_mainstems_mainstems__0 SparqlClient().insert_triples_as_graph( "urn:iow:summoned:ref_mainstems_mainstems__0:DUMMY_PREFIX_TO_DROP", @@ -81,18 +82,16 @@ def test_e2e(): ) assert all_graphs.success - resolved_job = definitions.get_job_def("harvest_source") - all_partitions = sources_partitions_def.get_partition_keys( dynamic_partitions_store=instance ) assert len(all_partitions) > 0, "Partitions were not generated" - all_graphs = resolved_job.execute_in_process( - instance=instance, partition_key="ref_mainstems_mainstems__0" - ) + harvest_job = definitions.get_job_def("harvest_source") - assert all_graphs.success, "Job execution failed for partition 'mainstems__0'" + assert harvest_job.execute_in_process( + instance=instance, partition_key="ref_mainstems_mainstems__0" + ).success, "Job execution failed for partition 'mainstems__0'" objects_query = """ select * where { @@ -105,10 +104,9 @@ def test_e2e(): "Florida River" in resultDict["o"] ), "The Florida River Mainstem was not found in the graph" - all_graphs = resolved_job.execute_in_process( + assert harvest_job.execute_in_process( instance=instance, partition_key="cdss_co_gages__0" - ) - assert all_graphs.success, "Job execution failed for partition 'cdss_co_gages__0'" + ).success, "Job execution failed for partition 'cdss_co_gages__0'" assert_data_is_linked_in_graph() # Don't want to actually transfer the file but should check it is installed @@ -130,11 +128,26 @@ def test_e2e(): """) # make sure we have 2 orgs graphs since we crawled 2 sources so far # urn:iow:orgs is nabu's way of serializing the s3 prefix 'orgs/' - assert sum("urn:iow:orgs" in g for g in all_graphs["g"]) == 2 + NUM_ORG_GRAPHS = sum("urn:iow:orgs" in g for g in all_graphs["g"]) + assert NUM_ORG_GRAPHS == 2 assert not any( "DUMMY_PREFIX_TO_DROP" in g for g in all_graphs["g"] ), "The dummy graph we inserted crawling was not dropped correctly" + # make sure that prov graphs were generated for the mainstem run + mainstem_prov_graphs = SparqlClient(repository="iowprov").execute_sparql(""" + SELECT DISTINCT ?g + WHERE { + GRAPH ?g { + ?s ?p ?o . + } + FILTER(CONTAINS(STR(?g), "urn:iow:prov:ref_mainstems_mainstems__0")) + } + """) + assert ( + len(mainstem_prov_graphs["g"]) > 0 + ), "prov graphs were not generated for the mainstem run" + def test_dynamic_partitions(): """Make sure that a new materialization of the gleaner config will create new partitions"""