Skip to content

Commit d258699

Browse files
committed
fix test after changes parquet structure
1 parent 2d62a83 commit d258699

File tree

2 files changed

+38
-30
lines changed

2 files changed

+38
-30
lines changed

digital_land/commands.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,8 @@ def dataset_create(
403403
# get the transformed files from the cache directory this is assumed right now but we may want to be stricter in the future
404404
transformed_parquet_dir = cache_dir / "transformed_parquet" / dataset
405405

406-
# creat directory for dataset_parquet_package
407-
dataset_parquet_path = cache_dir / dataset
406+
# creat directory for dataset_parquet_package, will create a general provenance one for now
407+
dataset_parquet_path = cache_dir / "provenance"
408408

409409
if not output_path:
410410
print("missing output path", file=sys.stderr)

tests/acceptance/test_dataset_create.py

+36-28
Original file line numberDiff line numberDiff line change
@@ -284,28 +284,41 @@ def test_acceptance_dataset_create(
284284
print("Command error output:")
285285
print(result.exception)
286286

287+
# get filepath for each parquet file
288+
289+
entity_parquet_path = (
290+
cache_path
291+
/ "provenance"
292+
/ "entity"
293+
/ "dataset=conservation-area"
294+
/ "entity.parquet"
295+
)
296+
fact_parquet_path = (
297+
cache_path
298+
/ "provenance"
299+
/ "fact"
300+
/ "dataset=conservation-area"
301+
/ "fact.parquet"
302+
)
303+
fact_resource_parquet_path = (
304+
cache_path
305+
/ "provenance"
306+
/ "fact-resource"
307+
/ "dataset=conservation-area"
308+
/ "fact-resource.parquet"
309+
)
287310
files = [
288-
str(f.name)
289-
for f in (
290-
cache_path / "conservation-area" / "dataset=conservation-area"
291-
).iterdir()
311+
entity_parquet_path,
312+
fact_parquet_path,
313+
fact_resource_parquet_path,
292314
]
293-
for file in ["entity.parquet", "fact.parquet", "fact_resource.parquet"]:
294-
assert file in files, f"file {file} not created. files found {', '.join(files)}"
315+
for file in files:
316+
assert file.exists(), f"file {file.name} not created."
295317
assert result.exit_code == 0, "error returned when building dataset"
296318

297319
# check that parquet files have been created correctlly in the cache directory
298320
# may want to adjust this for how we structure a parquet package in the future
299321
# also we are using the cache to store this for now but in the future we may want to store it in a specific directory
300-
files = [
301-
str(f.name)
302-
for f in (
303-
cache_path / "conservation-area" / "dataset=conservation-area"
304-
).iterdir()
305-
]
306-
307-
for file in ["entity.parquet", "fact.parquet", "fact_resource.parquet"]:
308-
assert file in files, f"file {file} not created. files found {', '.join(files)}"
309322

310323
# Check the sqlite file was created
311324
assert os.path.exists(output_path), f"sqlite file {output_path} does not exists"
@@ -322,23 +335,18 @@ def test_acceptance_dataset_create(
322335
len(missing_tables) == 0
323336
), f"Missing following tables in sqlite database: {missing_tables}"
324337

325-
for table in list(expected_tables):
338+
for file in files:
326339

327-
pq_rows = len(
328-
pd.read_parquet(
329-
cache_path
330-
/ "conservation-area"
331-
/ "dataset=conservation-area"
332-
/ f"{table}.parquet"
333-
)
334-
)
340+
pq_rows = len(pd.read_parquet(file))
335341

336-
assert pq_rows > 0, f"parquet file {table} is empty"
337-
sql_rows = cursor.execute(f"SELECT COUNT(*) FROM {table};").fetchone()[0]
338-
assert sql_rows > 0, f"database table {table} is empty"
342+
assert pq_rows > 0, f"parquet file {file.stem} is empty"
343+
sql_rows = cursor.execute(
344+
f"SELECT COUNT(*) FROM {file.stem.replace('-','_')};"
345+
).fetchone()[0]
346+
assert sql_rows > 0, f"database table {file.stem} is empty"
339347
assert (
340348
pq_rows == sql_rows
341-
), f"Different rows between the parquet files and database table for {table}"
349+
), f"Different rows between the parquet files and database table for {file.stem}"
342350

343351
# entity table specific tests to check how we expect the data to be used
344352

0 commit comments

Comments
 (0)