Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes to support events #21

Open
wants to merge 24 commits into
base: develop
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
cbe3c1f
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - Cha…
patkyn Feb 19, 2025
ba22774
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - Rem…
patkyn Feb 19, 2025
a2748c5
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - fix…
patkyn Feb 20, 2025
a93056c
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - cha…
patkyn Mar 4, 2025
eed134f
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - mak…
patkyn Mar 12, 2025
c3aa2ed
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - mak…
patkyn Mar 12, 2025
388a526
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - fix…
patkyn Mar 12, 2025
64c897c
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - upd…
patkyn Mar 13, 2025
aac762b
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - mor…
patkyn Mar 13, 2025
19319bb
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - Res…
patkyn Mar 14, 2025
e05d81c
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - Fix…
patkyn Mar 14, 2025
0d7b66e
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - Fix…
patkyn Mar 14, 2025
533c6d0
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - Pro…
patkyn Mar 18, 2025
253061d
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - Fix…
patkyn Mar 18, 2025
954c797
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - Pro…
patkyn Mar 19, 2025
1c10c40
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - fix…
patkyn Mar 19, 2025
a09f2c7
changed an image extension merge test
sadeghim Mar 19, 2025
a641fd1
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - rev…
patkyn Mar 19, 2025
a5e1069
Merge remote-tracking branch 'origin/feature/events' into feature/events
patkyn Mar 19, 2025
a3c9b7d
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - ref…
patkyn Mar 20, 2025
e0d8eed
refactor types from namedTuple to class
sadeghim Mar 20, 2025
2abb233
fix an import package
sadeghim Mar 20, 2025
9d9daca
fixed recursive import
sadeghim Mar 20, 2025
8f8172b
https://github.com/AtlasOfLivingAustralia/dwcahandler/issues/20 - cha…
patkyn Mar 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
#20 - refactor code
  • Loading branch information
patkyn committed Mar 20, 2025
commit a3c9b7d469f5eec256438b9fac5c9396333566f2
68 changes: 30 additions & 38 deletions src/dwcahandler/dwca/dwca_factory.py
Original file line number Diff line number Diff line change
@@ -27,11 +27,17 @@ def list_class_rowtypes() :
print(f"{name}: {member.value}")

@staticmethod
def get_contents_from_file_names(files: list) -> (dict[MetaElementTypes, str], dict[MetaElementTypes, str]):
def get_contents_from_file_names(files: list, csv_encoding: CSVEncoding,
content_keys: dict[MetaElementTypes, list] = None, zf: ZipFile = None) \
-> (ContentData, list[ContentData]):
"""Find the core content and extension contents from a list of file paths.
Core content will always be event if present, otherwise, occurrence content

:param files: list of files
:param csv_encoding: delimiter for txt file. Default is comma delimiter txt files if not supplied
:param content_keys: optional dictionary of MetaElementTypes and key list
for eg. {MetaElementTypes.OCCURRENCE, ["occurrenceID"]}
:param zf: Zipfile pointer if using
:return dict of core content type and file name and dict containing ext content type and file name
"""
def derive_type(file_list: list) -> dict[str, MetaElementTypes]:
@@ -51,9 +57,23 @@ def derive_type(file_list: list) -> dict[str, MetaElementTypes]:
core_filename = next(iter(core_file))
core_type = core_file[core_filename]
ext_files = {k: v for k, v in contents.items() if v != core_type}
return core_file, ext_files

return None
core_data = [core_filename] if not zf else io.TextIOWrapper(zf.open(core_filename), encoding="utf-8")
core_content = ContentData(data=core_data,
type=core_type, csv_encoding=csv_encoding,
keys=get_keys(class_type=core_type,
override_content_keys=content_keys))
ext_content = []
for ext_file, ext_type in ext_files.items():
ext_data = [ext_file] if not zf else io.TextIOWrapper(zf.open(ext_file), encoding="utf-8")
ext_content.append(ContentData(data=ext_data,
type=ext_type, csv_encoding=csv_encoding,
keys=get_keys(class_type=ext_type,
override_content_keys=content_keys)))
return core_content, ext_content
else:
raise ValueError("The core content cannot be determined. Please check filenames against the class type. "
"Use list_class_rowtypes to print the class types. ")

"""Perform various DwCA operations"""
@staticmethod
@@ -71,23 +91,9 @@ def create_dwca_from_file_list(files: list, output_dwca: Union[str, BytesIO],
:param content_keys: optional dictionary of MetaElementTypes and key list
for eg. {MetaElementTypes.OCCURRENCE, ["occurrenceID"]}
"""
core_content, ext_content_list = DwcaHandler.get_contents_from_file_names(files)
if core_content:
core_filename = next(iter(core_content))
core_type = core_content[core_filename]

core_content = ContentData(data=[core_filename], type=core_type, csv_encoding=csv_encoding,
keys=get_keys(class_type=core_type, override_content_keys=content_keys))
ext_content = []
for ext_file, ext_type in ext_content_list.items():
ext_content.append(ContentData(data=[ext_file],
type=ext_type, csv_encoding=csv_encoding,
keys=get_keys(class_type=ext_type,
override_content_keys=content_keys)))
DwcaHandler.create_dwca(core_csv=core_content, ext_csv_list=ext_content, output_dwca=output_dwca,
eml_content=eml_content)
else:
raise ValueError("The core content cannot be determined. Please check filename in zip file")
core_content, ext_content_list = DwcaHandler.get_contents_from_file_names(files=files, csv_encoding=csv_encoding)
DwcaHandler.create_dwca(core_csv=core_content, ext_csv_list=ext_content_list, output_dwca=output_dwca,
eml_content=eml_content)

@staticmethod
def create_dwca_from_zip_content(zip_file: str, output_dwca: Union[str, BytesIO],
@@ -107,24 +113,10 @@ def create_dwca_from_zip_content(zip_file: str, output_dwca: Union[str, BytesIO]
"""
with ZipFile(zip_file, 'r') as zf:
files = zf.namelist()
core_content, ext_content_list = DwcaHandler.get_contents_from_file_names(files)
if core_content:
core_filename = next(iter(core_content))
core_type = core_content[core_filename]
core_content = ContentData(data=io.TextIOWrapper(zf.open(core_filename), encoding="utf-8"),
type=core_type, csv_encoding=csv_encoding,
keys=get_keys(class_type=core_type,
override_content_keys=content_keys))
ext_content = []
for ext_file, ext_type in ext_content_list.items():
ext_content.append(ContentData(data=io.TextIOWrapper(zf.open(ext_file), encoding="utf-8"),
type=ext_type, csv_encoding=csv_encoding,
keys=get_keys(class_type=ext_type,
override_content_keys=content_keys)))
DwcaHandler.create_dwca(core_csv=core_content, ext_csv_list=ext_content, output_dwca=output_dwca,
eml_content=eml_content)
else:
raise ValueError("The core content cannot be determined. Please check filename in zip file")
core_content, ext_content_list = DwcaHandler.get_contents_from_file_names(files=files, csv_encoding=csv_encoding, zf=zf)
DwcaHandler.create_dwca(core_csv=core_content, ext_csv_list=ext_content_list, output_dwca=output_dwca,
eml_content=eml_content)
zf.close()

@staticmethod
def create_dwca(core_csv: ContentData,