Skip to content

Commit 618789a

Browse files
committed
#20 - Fix listterms test
1 parent d9f01e0 commit 618789a

File tree

2 files changed

+23
-25
lines changed

2 files changed

+23
-25
lines changed

src/dwcahandler/dwca/terms.py

+13-18
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ def update_dwc_terms(self):
139139
if len(dwc_class_df) > 0:
140140
self.class_df = self._update_class_df(NsPrefix.DWC, dwc_class_df)
141141

142+
return self.terms_df, self.class_df
143+
142144
@staticmethod
143145
def extract_term(term_string, add_underscore: bool = False):
144146
"""
@@ -173,30 +175,21 @@ def update_gbif_ext(self):
173175
"""
174176
Update the class row type and terms specified by GBIF_REGISTERED_EXTENSION and update by prefix
175177
"""
176-
def _get_latest(identifier: str):
177-
d = requests.get(Terms.GBIF_EXT).json()
178-
gbif_ext_df = pd.DataFrame.from_dict(d["extensions"])
179-
ext_df = gbif_ext_df[(gbif_ext_df["identifier"] == identifier) & (gbif_ext_df["isLatest"])]
180-
url: str = ""
181-
if len(ext_df) > 0 and "url" in ext_df.columns.tolist():
182-
url = ext_df["url"].values[0]
183-
return url
184-
185-
def _extract_term_info(every_term: tuple) -> list:
178+
def _extract_term_info(current_term: tuple) -> list:
186179
def _extract_value(text: str):
187180
return text.replace("\\n", "").replace('\\', ""). \
188181
replace('"', "").replace("'", "").split("=")[1]
189182

190-
term_name = _extract_value(every_term[0])
191-
namespace = _extract_value(every_term[1])
192-
uri = _extract_value(every_term[2])
183+
term_name = _extract_value(current_term[0])
184+
namespace = _extract_value(current_term[1])
185+
uri = _extract_value(current_term[2])
193186

194187
return [term_name, namespace, uri]
195188

196-
def _get_NsPrefix(val: str):
197-
prefix = [p for p in NsPrefix if p.value == val]
198-
if len(prefix) > 0:
199-
return prefix[0]
189+
def _get_ns_prefix(val: str):
190+
ns_prefix = [p for p in NsPrefix if p.value == val]
191+
if len(ns_prefix) > 0:
192+
return ns_prefix[0]
200193
else:
201194
return None
202195

@@ -207,7 +200,7 @@ def _get_NsPrefix(val: str):
207200

208201
for index, supported_ext in gbif_registered_ext.iterrows():
209202
url = supported_ext["url"]
210-
prefix = _get_NsPrefix(supported_ext["prefix"])
203+
prefix = _get_ns_prefix(supported_ext["prefix"])
211204
if url:
212205
update_class = pd.DataFrame([supported_ext["identifier"]], columns=["class_uri"])
213206
self.class_df = self._update_class_df(prefix, update_class)
@@ -234,6 +227,8 @@ def _get_NsPrefix(val: str):
234227
new_terms.loc[:, "prefix"] = prefix.value
235228
self.terms_df = self._update_df(prefix, new_terms, self.terms_df)
236229

230+
return self.terms_df, self.class_df
231+
237232
@staticmethod
238233
def update_terms():
239234
"""

tests/test_listterms.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@ def test_list_dwc_terms(self):
2121
assert df.query('term == "eventDate"').shape[0] == 1
2222
assert len(class_df[class_df["class"] == "OCCURRENCE"]) == 1
2323

24-
def test_update_list_terms(self, mocker):
24+
def test_update_dwc_list_terms(self, mocker):
2525
"""
2626
Test that the terms are stored in expected format and deprecated terms are not brought over
2727
"""
28+
expected_uri_list = ["http://rs.tdwg.org/dwc/terms/basisOfRecord",
29+
"http://rs.tdwg.org/dwc/terms/occurrenceID",
30+
"http://rs.tdwg.org/dwc/terms/scientificName"]
31+
expected_class_list = ["http://rs.tdwg.org/dwc/terms/Occurrence"]
2832
mocker.patch.object(Terms,
2933
attribute="get_dwc_source_data",
3034
return_value=pd.DataFrame
@@ -39,21 +43,20 @@ def test_update_list_terms(self, mocker):
3943
"http://rs.tdwg.org/dwc/terms/Occurrence",
4044
"http://rs.tdwg.org/dwc/terms/Occurrence"]}))
4145
mocker.patch('pandas.DataFrame.to_csv')
46+
mocker.patch('dwcahandler.dwca.terms.Terms.update_gbif_ext')
4247
return_terms_df, return_class_df = Terms.update_terms()
43-
return_dwc_terms_df = return_terms_df[return_terms_df.prefix.isin(['dwc'])].copy().reset_index(drop=True)
44-
return_dwc_class_df = return_class_df[return_class_df.prefix.isin(['dwc'])].copy().reset_index(drop=True)
48+
return_dwc_terms_df = return_terms_df[return_terms_df.prefix.isin(['dwc']) & return_terms_df["uri"].isin(expected_uri_list)].copy().reset_index(drop=True)
49+
return_dwc_class_df = return_class_df[return_class_df.prefix.isin(['dwc']) & return_class_df["class_uri"].isin(expected_class_list)].copy().reset_index(drop=True)
4550
pd.testing.assert_frame_equal(left=return_dwc_terms_df,
4651
right=pd.DataFrame(
4752
{"prefix": [NsPrefix.DWC.value, NsPrefix.DWC.value, NsPrefix.DWC.value],
4853
"term": ["basisOfRecord", "occurrenceID", "scientificName"],
49-
"uri": ["http://rs.tdwg.org/dwc/terms/basisOfRecord",
50-
"http://rs.tdwg.org/dwc/terms/occurrenceID",
51-
"http://rs.tdwg.org/dwc/terms/scientificName"]}),
54+
"uri": expected_uri_list}),
5255
check_index_type=False,
5356
check_dtype=False)
5457
pd.testing.assert_frame_equal(left=return_dwc_class_df,
5558
right=pd.DataFrame({"prefix": [NsPrefix.DWC.value],
5659
"class": ["OCCURRENCE"],
57-
"class_uri": ["http://rs.tdwg.org/dwc/terms/Occurrence"]}),
60+
"class_uri": expected_class_list}),
5861
check_index_type=False,
5962
check_dtype=False)

0 commit comments

Comments
 (0)