Skip to content

Commit

Permalink
add prov test (#124)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
C-Loftus and pre-commit-ci[bot] authored Mar 5, 2025
1 parent b9819d6 commit 0f5cba9
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 12 deletions.
2 changes: 1 addition & 1 deletion tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


class SparqlClient:
def __init__(self, repository: Literal["iow", "prov"] = "iow"):
def __init__(self, repository: Literal["iow", "iowprov"] = "iow"):
GRAPH_URL_IN_TESTING = "http://localhost:7200/repositories/"
self.url = GRAPH_URL_IN_TESTING + repository

Expand Down
35 changes: 24 additions & 11 deletions tests/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def assert_rclone_is_installed_properly():

def test_e2e():
"""Run the e2e test on the entire geoconnex graph"""
SparqlClient().clear_graph()
SparqlClient("iow").clear_graph()
SparqlClient("iowprov").clear_graph()
# insert a dummy graph before running that should be dropped after syncing ref_mainstems_mainstems__0
SparqlClient().insert_triples_as_graph(
"urn:iow:summoned:ref_mainstems_mainstems__0:DUMMY_PREFIX_TO_DROP",
Expand All @@ -81,18 +82,16 @@ def test_e2e():
)
assert all_graphs.success

resolved_job = definitions.get_job_def("harvest_source")

all_partitions = sources_partitions_def.get_partition_keys(
dynamic_partitions_store=instance
)
assert len(all_partitions) > 0, "Partitions were not generated"

all_graphs = resolved_job.execute_in_process(
instance=instance, partition_key="ref_mainstems_mainstems__0"
)
harvest_job = definitions.get_job_def("harvest_source")

assert all_graphs.success, "Job execution failed for partition 'mainstems__0'"
assert harvest_job.execute_in_process(
instance=instance, partition_key="ref_mainstems_mainstems__0"
).success, "Job execution failed for partition 'mainstems__0'"

objects_query = """
select * where {
Expand All @@ -105,10 +104,9 @@ def test_e2e():
"Florida River" in resultDict["o"]
), "The Florida River Mainstem was not found in the graph"

all_graphs = resolved_job.execute_in_process(
assert harvest_job.execute_in_process(
instance=instance, partition_key="cdss_co_gages__0"
)
assert all_graphs.success, "Job execution failed for partition 'cdss_co_gages__0'"
).success, "Job execution failed for partition 'cdss_co_gages__0'"

assert_data_is_linked_in_graph()
# Don't want to actually transfer the file but should check it is installed
Expand All @@ -130,11 +128,26 @@ def test_e2e():
""")
# make sure we have 2 orgs graphs since we crawled 2 sources so far
# urn:iow:orgs is nabu's way of serializing the s3 prefix 'orgs/'
assert sum("urn:iow:orgs" in g for g in all_graphs["g"]) == 2
NUM_ORG_GRAPHS = sum("urn:iow:orgs" in g for g in all_graphs["g"])
assert NUM_ORG_GRAPHS == 2
assert not any(
"DUMMY_PREFIX_TO_DROP" in g for g in all_graphs["g"]
), "The dummy graph we inserted crawling was not dropped correctly"

# make sure that prov graphs were generated for the mainstem run
mainstem_prov_graphs = SparqlClient(repository="iowprov").execute_sparql("""
SELECT DISTINCT ?g
WHERE {
GRAPH ?g {
?s ?p ?o .
}
FILTER(CONTAINS(STR(?g), "urn:iow:prov:ref_mainstems_mainstems__0"))
}
""")
assert (
len(mainstem_prov_graphs["g"]) > 0
), "prov graphs were not generated for the mainstem run"


def test_dynamic_partitions():
"""Make sure that a new materialization of the gleaner config will create new partitions"""
Expand Down

0 comments on commit 0f5cba9

Please sign in to comment.