@@ -284,28 +284,41 @@ def test_acceptance_dataset_create(
284
284
print ("Command error output:" )
285
285
print (result .exception )
286
286
287
+ # get filepath for each parquet file
288
+
289
+ entity_parquet_path = (
290
+ cache_path
291
+ / "provenance"
292
+ / "entity"
293
+ / "dataset=conservation-area"
294
+ / "entity.parquet"
295
+ )
296
+ fact_parquet_path = (
297
+ cache_path
298
+ / "provenance"
299
+ / "fact"
300
+ / "dataset=conservation-area"
301
+ / "fact.parquet"
302
+ )
303
+ fact_resource_parquet_path = (
304
+ cache_path
305
+ / "provenance"
306
+ / "fact-resource"
307
+ / "dataset=conservation-area"
308
+ / "fact-resource.parquet"
309
+ )
287
310
files = [
288
- str (f .name )
289
- for f in (
290
- cache_path / "conservation-area" / "dataset=conservation-area"
291
- ).iterdir ()
311
+ entity_parquet_path ,
312
+ fact_parquet_path ,
313
+ fact_resource_parquet_path ,
292
314
]
293
- for file in [ "entity.parquet" , "fact.parquet" , "fact_resource.parquet" ] :
294
- assert file in files , f"file { file } not created. files found { ', ' . join ( files ) } "
315
+ for file in files :
316
+ assert file . exists () , f"file { file . name } not created."
295
317
assert result .exit_code == 0 , "error returned when building dataset"
296
318
297
319
# check that parquet files have been created correctlly in the cache directory
298
320
# may want to adjust this for how we structure a parquet package in the future
299
321
# also we are using the cache to store this for now but in the future we may want to store it in a specific directory
300
- files = [
301
- str (f .name )
302
- for f in (
303
- cache_path / "conservation-area" / "dataset=conservation-area"
304
- ).iterdir ()
305
- ]
306
-
307
- for file in ["entity.parquet" , "fact.parquet" , "fact_resource.parquet" ]:
308
- assert file in files , f"file { file } not created. files found { ', ' .join (files )} "
309
322
310
323
# Check the sqlite file was created
311
324
assert os .path .exists (output_path ), f"sqlite file { output_path } does not exists"
@@ -322,23 +335,18 @@ def test_acceptance_dataset_create(
322
335
len (missing_tables ) == 0
323
336
), f"Missing following tables in sqlite database: { missing_tables } "
324
337
325
- for table in list ( expected_tables ) :
338
+ for file in files :
326
339
327
- pq_rows = len (
328
- pd .read_parquet (
329
- cache_path
330
- / "conservation-area"
331
- / "dataset=conservation-area"
332
- / f"{ table } .parquet"
333
- )
334
- )
340
+ pq_rows = len (pd .read_parquet (file ))
335
341
336
- assert pq_rows > 0 , f"parquet file { table } is empty"
337
- sql_rows = cursor .execute (f"SELECT COUNT(*) FROM { table } ;" ).fetchone ()[0 ]
338
- assert sql_rows > 0 , f"database table { table } is empty"
342
+ assert pq_rows > 0 , f"parquet file { file .stem } is empty"
343
+ sql_rows = cursor .execute (
344
+ f"SELECT COUNT(*) FROM { file .stem .replace ('-' ,'_' )} ;"
345
+ ).fetchone ()[0 ]
346
+ assert sql_rows > 0 , f"database table { file .stem } is empty"
339
347
assert (
340
348
pq_rows == sql_rows
341
- ), f"Different rows between the parquet files and database table for { table } "
349
+ ), f"Different rows between the parquet files and database table for { file . stem } "
342
350
343
351
# entity table specific tests to check how we expect the data to be used
344
352
0 commit comments