|
21 | 21 | from numpy import nan
|
22 | 22 | from pandas.errors import EmptyDataError
|
23 | 23 | from pandas.io import parsers
|
24 |
| -from dwcahandler.dwca import (BaseDwca, CoreOrExtType, MetaDefaultFields, CSVEncoding, |
| 24 | +from dwcahandler.dwca import (BaseDwca, CoreOrExtType, CSVEncoding, |
25 | 25 | ContentData, Defaults, Eml, Terms, get_keys,
|
26 | 26 | MetaDwCA, MetaElementInfo, MetaElementTypes,
|
27 | 27 | MetaElementAttributes, Stat, record_diff_stat)
|
@@ -93,9 +93,9 @@ def _update_core_ids(self, core_df) -> str:
|
93 | 93 | :param core_df: The data frame to generate identifiers for
|
94 | 94 | return id field
|
95 | 95 | """
|
96 |
| - if MetaDefaultFields.ID not in core_df.columns.to_list(): |
97 |
| - core_df.insert(0, MetaDefaultFields.ID, core_df.apply(lambda _: uuid.uuid4(), axis=1), False) |
98 |
| - return MetaDefaultFields.ID |
| 96 | + if self.defaults_prop.MetaDefaultFields.ID not in core_df.columns.to_list(): |
| 97 | + core_df.insert(0, self.defaults_prop.MetaDefaultFields.ID, core_df.apply(lambda _: uuid.uuid4(), axis=1), False) |
| 98 | + return self.defaults_prop.MetaDefaultFields.ID |
99 | 99 | else:
|
100 | 100 | raise ValueError("core df should not contain id column")
|
101 | 101 |
|
@@ -140,23 +140,23 @@ def _update_extension_ids(self, csv_content: pd.DataFrame, core_df_content: pd.D
|
140 | 140 | set(link_col).issubset(set(csv_content.index.names))):
|
141 | 141 | csv_content.reset_index(inplace=True, drop=True)
|
142 | 142 |
|
143 |
| - csv_content = csv_content.merge(core_df_content.loc[:, MetaDefaultFields.ID], |
| 143 | + csv_content = csv_content.merge(core_df_content.loc[:, self.defaults_prop.MetaDefaultFields.ID], |
144 | 144 | left_on=link_col,
|
145 | 145 | right_on=link_col, how='outer')
|
146 | 146 |
|
147 |
| - if MetaDefaultFields.ID in csv_content.columns.to_list(): |
148 |
| - unmatched_content = csv_content[csv_content[MetaDefaultFields.ID].isnull()] |
149 |
| - unmatched_content = unmatched_content.drop(columns=[MetaDefaultFields.ID]) |
| 147 | + if self.defaults_prop.MetaDefaultFields.ID in csv_content.columns.to_list(): |
| 148 | + unmatched_content = csv_content[csv_content[self.defaults_prop.MetaDefaultFields.ID].isnull()] |
| 149 | + unmatched_content = unmatched_content.drop(columns=[self.defaults_prop.MetaDefaultFields.ID]) |
150 | 150 | if len(unmatched_content) > 0:
|
151 | 151 | log.info("There are orphaned keys in extension file")
|
152 | 152 | pd.set_option("display.max_columns", 7)
|
153 | 153 | pd.set_option('display.max_colwidth', 15)
|
154 | 154 | pd.set_option('display.max_rows', 10)
|
155 | 155 | log.info("\n%s", unmatched_content)
|
156 |
| - csv_content = csv_content[~csv_content[MetaDefaultFields.ID].isnull()] |
157 |
| - col = csv_content.pop(MetaDefaultFields.ID) |
| 156 | + csv_content = csv_content[~csv_content[self.defaults_prop.MetaDefaultFields.ID].isnull()] |
| 157 | + col = csv_content.pop(self.defaults_prop.MetaDefaultFields.ID) |
158 | 158 | csv_content.insert(0, col.name, col)
|
159 |
| - csv_content.rename(columns={MetaDefaultFields.ID: ext_core_id_field}, inplace=True) |
| 159 | + csv_content.rename(columns={self.defaults_prop.MetaDefaultFields.ID: ext_core_id_field}, inplace=True) |
160 | 160 | return csv_content, ext_core_id_field
|
161 | 161 | else:
|
162 | 162 | raise ValueError("Something is not right. The core id failed to be created")
|
@@ -207,8 +207,9 @@ def _find_fields_with_zero_idx(meta_element_fields: list):
|
207 | 207 | def _add_first_id_field_if_exists(meta_element: MetaElementAttributes):
|
208 | 208 | zero_index_exist = _find_fields_with_zero_idx(meta_element.fields)
|
209 | 209 | if meta_element.core_id and meta_element.core_id.index and not zero_index_exist:
|
210 |
| - return [MetaDefaultFields.ID] if meta_element.meta_element_type.core_or_ext_type == CoreOrExtType.CORE \ |
211 |
| - else [MetaDefaultFields.CORE_ID] |
| 210 | + return [self.defaults_prop.MetaDefaultFields.ID] if ( |
| 211 | + meta_element.meta_element_type.core_or_ext_type == CoreOrExtType.CORE) \ |
| 212 | + else [self.defaults_prop.MetaDefaultFields.CORE_ID] |
212 | 213 | else:
|
213 | 214 | return []
|
214 | 215 |
|
@@ -287,7 +288,7 @@ def _update_values(self, df_content, delta_df_content, keys, stat):
|
287 | 288 | :return: The updated content
|
288 | 289 | """
|
289 | 290 | # Extract columns that need updating, excluding self.keys and id
|
290 |
| - non_update_column = list(MetaDefaultFields) |
| 291 | + non_update_column = list(self.defaults_prop.MetaDefaultFields) |
291 | 292 | non_update_column.extend(keys)
|
292 | 293 | update_columns = [i for i in delta_df_content.columns.to_list()
|
293 | 294 | if i not in non_update_column]
|
@@ -429,12 +430,13 @@ def _extract_core_keys(self, core_content, keys):
|
429 | 430 | :return: A data frame indexed by the `id` column that contains the
|
430 | 431 | key elements for each record
|
431 | 432 | """
|
432 |
| - columns = [MetaDefaultFields.ID] if MetaDefaultFields.ID in core_content.columns.tolist() else [] |
| 433 | + columns = [self.defaults_prop.MetaDefaultFields.ID] \ |
| 434 | + if self.defaults_prop.MetaDefaultFields.ID in core_content.columns.tolist() else [] |
433 | 435 | if all(key in core_content.columns for key in keys):
|
434 | 436 | columns.extend(keys)
|
435 | 437 | df = core_content[columns]
|
436 |
| - if MetaDefaultFields.ID in core_content.columns.tolist(): |
437 |
| - df.set_index(MetaDefaultFields.ID, drop=True, inplace=True) |
| 438 | + if self.defaults_prop.MetaDefaultFields.ID in core_content.columns.tolist(): |
| 439 | + df.set_index(self.defaults_prop.MetaDefaultFields.ID, drop=True, inplace=True) |
438 | 440 | else:
|
439 | 441 | raise ValueError(f"Keys does not exist in core content {''.join(keys)}")
|
440 | 442 | return df
|
@@ -869,17 +871,17 @@ def validate_content(self, content_to_validate: dict = None, error_file: str = N
|
869 | 871 |
|
870 | 872 | if not self.check_duplicates(keys_df, content.keys, error_file):
|
871 | 873 | log.error("Validation failed for %s %s content for duplicates keys %s",
|
872 |
| - content.meta_info.core_or_ext_type, content.meta_info.type, content.keys) |
| 874 | + content.meta_info.core_or_ext_type.value, content.meta_info.type, content.keys) |
873 | 875 | validation_content_success = False
|
874 | 876 |
|
875 | 877 | if not self._validate_columns(content):
|
876 | 878 | log.error("Validation failed for %s %s content for duplicate columns",
|
877 |
| - content.meta_info.core_or_ext_type, content.meta_info.type) |
| 879 | + content.meta_info.core_or_ext_type.value, content.meta_info.type) |
878 | 880 | validation_content_success = False
|
879 | 881 |
|
880 | 882 | if validation_content_success:
|
881 | 883 | log.info("Validation successful for %s %s content for unique keys %s",
|
882 |
| - content.meta_info.core_or_ext_type, content.meta_info.type, content.keys) |
| 884 | + content.meta_info.core_or_ext_type.value, content.meta_info.type, content.keys) |
883 | 885 | else:
|
884 | 886 | validation_success = False
|
885 | 887 |
|
|
0 commit comments