Skip to content

Commit 253061d

Browse files
committed
#20 - Fix test which is failed
1 parent 533c6d0 commit 253061d

File tree

2 files changed

+20
-18
lines changed

2 files changed

+20
-18
lines changed

src/dwcahandler/dwca/dwca_factory.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,12 @@ def merge_dwca(dwca_file: Union[str, BytesIO], delta_dwca_file: Union[str, Bytes
175175

176176
@staticmethod
177177
def validate_dwca(dwca_file: Union[str, BytesIO], content_keys: dict = None, error_file: str = None):
178-
"""Test a dwca for consistency
178+
"""Validate dwca for unique key and column for core content by default.
179+
If content_keys is supplied, the content is also validated.
179180
180181
:param dwca_file: The path to the DwCA
181-
:param content_keys: a dictionary of class type and the key
182+
:param content_keys: a dictionary of class type and the key.
183+
When content_keys are provided, validation will be performed on the content as well.
182184
for eg. {MetaElementTypes.OCCURRENCE, "occurrenceID"}
183185
:param error_file: The file to write errors to. If None, errors are logged
184186
"""

tests/test_validate_dwca.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,17 @@ def test_validate_dwca(self):
2525
Test for read and extract dwca. Validate core content
2626
"""
2727
simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample1")
28-
keys_lookup = {MetaElementTypes.OCCURRENCE: 'occurrenceID'}
29-
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup)
28+
content_keys = {MetaElementTypes.OCCURRENCE: 'occurrenceID'}
29+
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, content_keys=content_keys)
3030
assert dwca_result
3131

3232
def test_validate_dwca2(self):
3333
"""
3434
Test for read and extract dwca. Validate core content
3535
"""
3636
simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample2")
37-
keys_lookup = {MetaElementTypes.OCCURRENCE: 'occurrenceID'}
38-
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup)
37+
content_keys = {MetaElementTypes.OCCURRENCE: 'occurrenceID'}
38+
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, content_keys=content_keys)
3939
assert dwca_result
4040

4141
def test_empty_keys(self, caplog):
@@ -44,8 +44,8 @@ def test_empty_keys(self, caplog):
4444
"""
4545
caplog.set_level(logging.INFO)
4646
simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample3")
47-
keys_lookup = {MetaElementTypes.OCCURRENCE: 'occurrenceID'}
48-
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup)
47+
content_keys = {MetaElementTypes.OCCURRENCE: 'occurrenceID'}
48+
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, content_keys=content_keys)
4949
assert not dwca_result
5050
assert "Empty values found in ['occurrenceID']. Total rows affected: 1" in caplog.messages
5151
assert "Empty values found in dataframe row: [0]" in caplog.messages
@@ -56,8 +56,8 @@ def test_duplicate_key(self, caplog):
5656
"""
5757
caplog.set_level(logging.INFO)
5858
simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample4")
59-
keys_lookup = {MetaElementTypes.OCCURRENCE: 'catalogNumber'}
60-
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup)
59+
content_keys = {MetaElementTypes.OCCURRENCE: 'catalogNumber'}
60+
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, content_keys=content_keys)
6161
assert not dwca_result
6262
assert "Duplicate ['catalogNumber'] found. Total rows affected: 3" in caplog.messages
6363
assert "Duplicate values: ['014800' '014823']" in caplog.messages
@@ -67,10 +67,10 @@ def test_duplicate_columns_in_dwca(self):
6767
Test for read and extract dwca. Validate duplicate columns specified in metadata of dwca
6868
"""
6969
simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample5")
70-
keys_lookup = {MetaElementTypes.OCCURRENCE: 'catalogNumber'}
70+
content_keys = {MetaElementTypes.OCCURRENCE: 'catalogNumber'}
7171

7272
with pytest.raises(ValueError) as exc_info:
73-
DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup)
73+
DwcaHandler.validate_dwca(dwca_file=simple_dwca, content_keys=content_keys)
7474

7575
assert ("Duplicate columns ['catalogNumber'] specified in the metadata for occurrence.csv"
7676
in str(exc_info.value))
@@ -81,9 +81,9 @@ def test_dwca_with_occ_core_ext(self, caplog):
8181
"""
8282
caplog.set_level(logging.INFO)
8383
simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample6")
84-
keys_lookup = {MetaElementTypes.OCCURRENCE: 'gbifID'}
84+
content_keys = {MetaElementTypes.OCCURRENCE: 'gbifID'}
8585

86-
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup)
86+
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, content_keys=content_keys)
8787
assert dwca_result
8888
assert "Validation successful for core MetaElementTypes.OCCURRENCE content for unique keys ['gbifID']" in caplog.messages
8989
assert "Validation successful for extension MetaElementTypes.OCCURRENCE content for unique keys ['gbifID']" in caplog.messages
@@ -95,9 +95,9 @@ def test_dwca_with_occ_core_ext_with_url_as_key(self, caplog):
9595
"""
9696
caplog.set_level(logging.INFO)
9797
simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample6")
98-
keys_lookup = {MetaElementTypes.OCCURRENCE: 'http://rs.gbif.org/terms/1.0/gbifID'}
98+
content_keys = {MetaElementTypes.OCCURRENCE: 'http://rs.gbif.org/terms/1.0/gbifID'}
9999

100-
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup)
100+
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, content_keys=content_keys)
101101
assert dwca_result
102102
assert "Validation successful for core MetaElementTypes.OCCURRENCE content for unique keys ['gbifID']" in caplog.messages
103103
assert "Validation successful for extension MetaElementTypes.OCCURRENCE content for unique keys ['gbifID']" in caplog.messages
@@ -108,9 +108,9 @@ def test_dwca_with_occ_core_ext_with_duplicates(self, caplog):
108108
"""
109109
caplog.set_level(logging.INFO)
110110
simple_dwca = make_zip_from_folder_contents(f"{input_folder}/dwca-sample7")
111-
keys_lookup = {MetaElementTypes.OCCURRENCE: 'http://rs.gbif.org/terms/1.0/gbifID'}
111+
content_keys = {MetaElementTypes.OCCURRENCE: 'http://rs.gbif.org/terms/1.0/gbifID'}
112112

113-
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, keys_lookup=keys_lookup)
113+
dwca_result = DwcaHandler.validate_dwca(dwca_file=simple_dwca, content_keys=content_keys)
114114
assert not dwca_result
115115
assert "Duplicate ['gbifID'] found. Total rows affected: 2" in caplog.messages
116116
assert "Duplicate values: ['sample']" in caplog.messages

0 commit comments

Comments
 (0)