Skip to content

Commit 19319bb

Browse files
committed
#20 - Resolve flake8 linting and fix readme
1 parent aac762b commit 19319bb

File tree

7 files changed

+34
-34
lines changed

7 files changed

+34
-34
lines changed

README.md

+19-17
Original file line numberDiff line numberDiff line change
@@ -58,19 +58,29 @@ To install published package from testpypi
5858
pip install -i https://test.pypi.org/simple/ dwcahandler
5959
```
6060
 
61-
### Extensions that are currently supported and have been tested in ALA ingestion:
62-
Standard Darwin Core Terms and Class
63-
Simple Multimedia https://rs.gbif.org/extension/gbif/1.0/multimedia.xml
64-
Extended Measurement Or Fact http://rs.iobis.org/obis/terms/ExtendedMeasurementOrFact
61+
### Supported extensions that have been tested in ALA:
62+
* Standard Darwin Core Terms and Class
63+
* Simple Multimedia https://rs.gbif.org/extension/gbif/1.0/multimedia.xml
64+
* Extended Measurement Or Fact http://rs.iobis.org/obis/terms/ExtendedMeasurementOrFact
6565

66+
#### Terms
67+
* Terms are listed in [terms.csv](src/dwcahandler/dwca/terms/terms.csv)
68+
```python
69+
from dwcahandler import DwcaHandler
6670

67-
* List terms that is supported in dwcahandler package in [terms.csv](src/dwcahandler/dwca/terms/terms.csv)
71+
df_terms, df_class = DwcaHandler.list_terms()
72+
print(df_terms, df_class)
73+
```
6874

69-
* Class RowTypes are defined in MetaElementTypes enum class MetaElementTypes.
70-
The supported types are defined by the class column in [class-rowtype.csv](src/dwcahandler/dwca/terms/class-rowtype.csv)
71-
For eg: MetaElementTypes.OCCURRENCE
75+
#### Class
76+
* Listed in [class-rowtype.csv](src/dwcahandler/dwca/terms/class-rowtype.csv)
77+
* Used in MetaElementTypes class enum name:
78+
```python
79+
MetaElementTypes.OCCURRENCE
80+
MetaElementTypes.MULTIMEDIA
81+
```
7282

73-
To list all the class rowtypes supported
83+
To list all the class rowtypes
7484
```python
7585
from dwcahandler import DwcaHandler
7686

@@ -151,11 +161,3 @@ DwcaHandler.delete_records(dwca_file='/tmp/dwca.zip',
151161
output_dwca='/tmp/new-dwca.zip')
152162
```
153163
 
154-
155-
```python
156-
from dwcahandler import DwcaHandler
157-
158-
df_terms, df_class = DwcaHandler.list_terms()
159-
print(df_terms, df_class)
160-
```
161-
 

src/dwcahandler/dwca/core_dwca.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -351,9 +351,9 @@ def set_keys(self, keys: dict = None):
351351
col_term = []
352352
for a_key in key_list:
353353
if a_key not in dwca_content.df_content.columns.tolist():
354-
col_term.append(Terms.extract_term(a_key))
354+
col_term.append(Terms.extract_term(a_key))
355355
else:
356-
col_term.append(a_key)
356+
col_term.append(a_key)
357357
dwca_content.keys = col_term
358358
set_keys[k] = col_term
359359

@@ -881,7 +881,7 @@ def validate_content(self, content_to_validate: dict = None, error_file: str = N
881881

882882
if not self.check_duplicates(keys_df, content.keys, error_file):
883883
log.error("Validation failed for %s %s content for duplicates keys %s",
884-
content.meta_info.core_or_ext_type, content.meta_info.type, content.keys)
884+
content.meta_info.core_or_ext_type, content.meta_info.type, content.keys)
885885
validation_content_success = False
886886

887887
if not self._validate_columns(content):
@@ -891,7 +891,7 @@ def validate_content(self, content_to_validate: dict = None, error_file: str = N
891891

892892
if validation_content_success:
893893
log.info("Validation successful for %s %s content for unique keys %s",
894-
content.meta_info.core_or_ext_type, content.meta_info.type, content.keys)
894+
content.meta_info.core_or_ext_type, content.meta_info.type, content.keys)
895895
else:
896896
validation_success = False
897897

src/dwcahandler/dwca/dwca_meta.py

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
MetaElementTypes = Enum("MetaElementTypes", dict(DwcClassRowTypes))
2121

22+
2223
def get_meta_class_row_type(row_type_uri: str):
2324
"""
2425
Find a row type by URI

src/dwcahandler/dwca/terms.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import os
2-
from pathlib import Path
32
from dataclasses import dataclass, field
43
import re
54
import pandas as pd
@@ -12,10 +11,10 @@
1211

1312
this_dir, this_filename = os.path.split(__file__)
1413

15-
log.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
16-
level=log.DEBUG)
14+
log.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=log.DEBUG)
1715
log = log.getLogger("DwcaTerms")
1816

17+
1918
def absolute_file_paths(directory):
2019
"""Convert files in a directory into absolute paths and return
2120
as a generator
@@ -59,10 +58,11 @@ class GbifRegisteredExt(ExtInfo, Enum):
5958
EXTENDED_MEASUREMENT_OR_FACT = ExtInfo(uri="http://rs.iobis.org/obis/terms/ExtendedMeasurementOrFact",
6059
prefix=NsPrefix.OBIS,
6160
namespace="http://rs.iobis.org/obis/terms/")
62-
#AC_MULTIMEDIA = ExtInfo(uri="http://rs.tdwg.org/ac/terms/Multimedia",
61+
# AC_MULTIMEDIA = ExtInfo(uri="http://rs.tdwg.org/ac/terms/Multimedia",
6362
# prefix=NsPrefix.AC,
6463
# namespace="http://rs.tdwg.org/ac/terms/")
6564

65+
6666
@dataclass
6767
class Terms:
6868
"""
@@ -119,7 +119,7 @@ def _update_df(self, ns: NsPrefix, updates: pd.DataFrame, df: pd.DataFrame):
119119
:param updates: dataframe containing the class rows or terms to update
120120
:param df: dataframe to update
121121
"""
122-
def __get_update_info (update_df: pd.DataFrame):
122+
def __get_update_info(update_df: pd.DataFrame):
123123
update_type: str = "term"
124124
count = len(update_df)
125125
if 'class' in update_df.columns.tolist():
@@ -137,7 +137,6 @@ def __get_update_info (update_df: pd.DataFrame):
137137
def get_dwc_source_data() -> pd.DataFrame:
138138
return pd.read_csv(Terms.DWC_SOURCE_URL, delimiter=",", encoding='utf-8', dtype='str')
139139

140-
#@staticmethod
141140
def update_dwc_terms(self):
142141
"""
143142
Pull the latest terms from gbif dwc csv url and update the darwin core vocab terms in the package
@@ -195,7 +194,6 @@ def get_class_row_types():
195194
class_list = list(tuple(zip(class_df["class"], class_df["class_uri"])))
196195
return class_list
197196

198-
#@staticmethod
199197
def update_gbif_ext(self):
200198
"""
201199
Update the class row type and terms specified by GBIF_REGISTERED_EXTENSION and update by prefix
@@ -239,7 +237,7 @@ def _extract_value(text: str):
239237

240238
df = pd.DataFrame(term_info, columns=["term", "namespace", 'uri'])
241239
std_ns = ["http://rs.tdwg.org/dwc/terms/", "http://purl.org/dc/terms/"]
242-
existing_terms = self.terms_df #Terms().terms_df
240+
existing_terms = self.terms_df
243241
extra_terms_df = df[(df["namespace"].isin(std_ns)) & (~df["uri"].isin(existing_terms["uri"]))]
244242
if len(extra_terms_df) > 0:
245243
log.info("Additional standard terms found:\n%s", extra_terms_df)
@@ -266,7 +264,6 @@ def __sort_values(df_to_sort: pd.DataFrame, sorting_column: str) -> pd.DataFrame
266264
ext_df = df_to_sort[~std_filter_df].copy()
267265
return pd.concat([std_df, ext_df], ignore_index=True)
268266

269-
270267
log.info("Current class and terms")
271268

272269
exclude_update_prefixes = [NsPrefix.DC.value]
@@ -281,8 +278,8 @@ def __sort_values(df_to_sort: pd.DataFrame, sorting_column: str) -> pd.DataFrame
281278
terms.terms_df = terms.terms_df[terms.terms_df.prefix.isin(exclude_update_prefixes)]
282279
terms.update_dwc_terms()
283280
terms.update_gbif_ext()
284-
terms.class_df = __sort_values(terms.class_df, "class")
285-
terms.terms_df = __sort_values(terms.terms_df, "term")
281+
terms.class_df = __sort_values(terms.class_df, "class")
282+
terms.terms_df = __sort_values(terms.terms_df, "term")
286283
terms.class_df.to_csv(Terms.CLASS_ROW_TYPE_PATH, index=False)
287284
terms.terms_df.to_csv(Terms.TERMS_FILE_PATH, index=False)
288285

src/dwcahandler/scripts/update_terms.py

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from dwcahandler.dwca.terms import Terms
1212

13+
1314
def update_terms():
1415
"""
1516
Call the update_dwc_terms to get the latest version of tdwg dwc terms

tests/test_create_dwca.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,4 @@ def test_create_occurrence_dwca_occurrence_without_ext(self):
155155

156156
assert output_obj
157157

158-
check_output(output_obj, test_files_folder)
158+
check_output(output_obj, test_files_folder)

tests/test_validate_dwca.py

-1
Original file line numberDiff line numberDiff line change
@@ -119,4 +119,3 @@ def test_dwca_with_occ_core_ext_with_duplicates(self, caplog):
119119
assert "Duplicate ['gbifID'] found. Total rows affected: 3" in caplog.messages
120120
assert "Duplicate values: ['sample']" in caplog.messages
121121
assert "Validation failed for extension MetaElementTypes.OCCURRENCE content for duplicates keys ['gbifID']" in caplog.messages
122-

0 commit comments

Comments
 (0)