diff --git a/CHANGELOG.md b/CHANGELOG.md index a4fe868..3ca36d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [comment]: # (Template for updates) +## [1.1.0] - 2025-03-11 + +### Added + +- Details addition functions to Regmem. + +### Changed + +- Better slugify function for regmem entries. +- Added 'full' save option for regmem (include default values etc). + + ## [1.0.0] - 2025-01-23 ### Added diff --git a/pyproject.toml b/pyproject.toml index 42529ff..672c0eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mysoc-validator" -version = "1.0.0" +version = "1.1.0" description = "Pydantic validators for mySociety democracy types" authors = ["mySociety "] readme = "README.md" diff --git a/src/mysoc_validator/__init__.py b/src/mysoc_validator/__init__.py index 7b1d43f..def0650 100644 --- a/src/mysoc_validator/__init__.py +++ b/src/mysoc_validator/__init__.py @@ -8,7 +8,7 @@ from .models.transcripts import Transcript from .models.xml_interests import Register as XMLRegister -__version__ = "1.0.0" +__version__ = "1.1.0" __all__ = [ "Popolo", diff --git a/src/mysoc_validator/models/interests.py b/src/mysoc_validator/models/interests.py index 5a88099..467c71a 100644 --- a/src/mysoc_validator/models/interests.py +++ b/src/mysoc_validator/models/interests.py @@ -10,6 +10,7 @@ from hashlib import md5 from pathlib import Path from typing import ( + TYPE_CHECKING, Annotated, Any, Generic, @@ -33,6 +34,9 @@ model_validator, ) +if TYPE_CHECKING: + import pandas as pd + ContentType = Literal["string", "markdown", "xml"] ValidDetailTypes = Union[ int, str, Decimal, datetime.date, float, bool, list["RegmemDetailGroup"] @@ -44,6 +48,34 @@ ) +def slugify(s: str) -> str: + """ + lowercase, space with _, only otherwise alphanumeric, no double __ + """ + # remove non-alphanumeric + s = "".join([c for c in s if c.isalnum() or c == " "]).strip() + # replace spaces with _ + s = s.replace(" ", "_") + # lowercase + s = s.lower() + # remove double __ + s = s.replace("__", "_") + return s + + +def df_to_details_group(df: pd.DataFrame) -> list[RegmemDetailGroup]: + groups: list[RegmemDetailGroup] = [] + for row in df.to_dict(orient="records"): + row_group = RegmemDetailGroup() + + for k, v in row.items(): + row_group.append(RegmemDetail[type(v)](display_as=str(k), value=v)) + + groups.append(row_group) + + return groups + + class CommonKey(str, Enum): COMPANIES_HOUSE = "companies_house" URL = "url" @@ -110,7 +142,7 @@ def infer_slug(self): if slug is missing, infer from display_as and vice versa. """ if not self.slug and self.display_as: - self.slug = self.display_as.lower().replace(" ", "_") + self.slug = slugify(self.display_as) if not self.display_as and self.slug: self.display_as = self.slug.replace("_", " ").title() return self @@ -169,8 +201,8 @@ def get_tag(v: Any) -> str: item_type = v.get("type") item_value = v.get("value") else: - item_type = getattr(v, "type") - item_value = getattr(v, "value") + item_type = getattr(v, "type", None) + item_value = getattr(v, "value", None) if item_type: return item_type @@ -249,7 +281,8 @@ def extend(self, items: list[RegmemDetail[Any]], *, source: Optional[str] = None def check_unique_detail_names(self): names = [x.slug for x in self.root] if len(names) != len(set(names)): - raise ValueError("Duplicate detail names in entry") + duplicate_names = set([x for x in names if names.count(x) > 1]) + raise ValueError(f"Duplicate detail names in entry: {duplicate_names}") RegmemDetailContainer = RegmemDetail[list[RegmemDetailGroup]] @@ -313,12 +346,55 @@ class RegmemInfoBase(BaseModel): description="Sub-entries - for instance multiple payments to this person.", ) - def details_dict(self): + def add_details( + self, + *, + source: Optional[str] = None, + **values: Union[ValidDetailTypes, pd.DataFrame], + ): + import pandas as pd + + for k, v in values.items(): + if isinstance(v, pd.DataFrame): + self.details.append( + RegmemDetailContainer(value=df_to_details_group(v), slug=k), + source=source, + ) + else: + self.details.append( + RegmemDetail[type(v)](value=v, slug=k), source=source + ) + + def details_dict(self, reduce: Optional[dict[str, list[str]]] = None): """ Condense the details into a dictionary of keys and values. """ - data = {"id": self.comparable_id, "content": self.content} + data: dict[str, Any] = {"id": self.comparable_id, "content": self.content} + if self.date_registered: + data["date_registered"] = self.date_registered.isoformat() + if self.date_published: + data["date_published"] = self.date_published.isoformat() data |= self.details.detail_dict() + + def extract_discription( + list_of_groups: list[RegmemDetailGroup], slug: str + ) -> list[str]: + values = [] + for group in list_of_groups: + for item in group: + if item.slug == slug: + values.append(item.value) + return values + + if reduce: + for key, slugs in reduce.items(): + if key in data: + for slug in slugs: + value = data[key] + if isinstance(value, list): + data[slug] = extract_discription(value, slug) + # remove the original key + del data[key] return data def get_detail(self, name: Union[str, CommonKey]) -> Optional[RegmemDetail[Any]]: @@ -485,10 +561,15 @@ def get_person_from_id(self, person_id: str) -> RegmemPerson: raise ValueError(f"Person {person_id} not found in register") @classmethod - def from_path(cls, path: Path): + def from_path(cls, path: Path) -> RegmemRegister: data = path.read_text() return cls.model_validate_json(data) - def to_path(self, path: Path): - data = self.model_dump_json(indent=2, exclude_none=True, exclude_defaults=True) + def to_path(self, path: Path, full: bool = False): + if full: + data = self.model_dump_json(indent=2) + else: + data = self.model_dump_json( + indent=2, exclude_none=True, exclude_defaults=True + ) path.write_text(data)