Skip to content

Commit 9382eee

Browse files
committed
Working transcript and interest models with basic tests
1 parent 675ce0b commit 9382eee

9 files changed

+25266
-25
lines changed

data/debates2023-03-28d.xml

+5,502
Large diffs are not rendered by default.

data/regmem2024-05-28.xml

+19,619
Large diffs are not rendered by default.

src/mysoc_validator/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
mysoc democracy validation models
33
"""
44

5+
from .models.interests import Register
56
from .models.popolo import Popolo
67
from .models.transcripts import Transcript
78

8-
__all__ = ["Popolo", "Transcript"]
9+
__all__ = ["Popolo", "Transcript", "Register"]
910

1011
__version__ = "0.1.0"

src/mysoc_validator/__main__.py

+23-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import rich
66
import typer
77

8+
from .models.interests import Register
89
from .models.popolo import Popolo
910
from .models.transcripts import Transcript
1011

@@ -14,6 +15,7 @@
1415
class ValidateOptions(str, Enum):
1516
POPOLO = "popolo"
1617
TRANSCRIPT = "transcript"
18+
INTERESTS = "interests"
1719

1820

1921
@app.command()
@@ -44,9 +46,14 @@ def validate(
4446
validate_popolo_url_file(url)
4547
elif type == ValidateOptions.TRANSCRIPT:
4648
if not file:
47-
typer.echo("Must provide a file for a transcript.")
49+
typer.echo("Must provide a local file for a transcript.")
4850
raise typer.Exit(code=1)
4951
validate_transcript(file)
52+
elif type == ValidateOptions.INTERESTS:
53+
if not file:
54+
typer.echo("Must provide a local file for interests.")
55+
raise typer.Exit(code=1)
56+
validate_interests(file)
5057

5158

5259
def validate_popolo_file(file: Path):
@@ -89,13 +96,27 @@ def validate_transcript(file: Path):
8996
Returns Exit 1 if a validation error.
9097
"""
9198
try:
92-
Transcript.from_path(file)
99+
Transcript.from_xml_path(file)
93100
except Exception as e:
94101
typer.echo(f"Error: {e}")
95102
rich.print("[red]Invalid Transcript file[/red]")
96103
raise typer.Exit(code=1)
97104
rich.print("[green]Valid Transcript file[/green]")
98105

99106

107+
def validate_interests(file: Path):
108+
"""
109+
Validate a mysoc style Popolo file.
110+
Returns Exit 1 if a validation error.
111+
"""
112+
try:
113+
Register.from_xml_path(file)
114+
except Exception as e:
115+
typer.echo(f"Error: {e}")
116+
rich.print("[red]Invalid Interests file[/red]")
117+
raise typer.Exit(code=1)
118+
rich.print("[green]Valid Interests file[/green]")
119+
120+
100121
if __name__ == "__main__":
101122
app()
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Structure for handling a register of interests
3+
"""
4+
5+
from __future__ import annotations
6+
7+
from datetime import date
8+
from typing import Literal as Literal
9+
from typing import Optional
10+
11+
from pydantic import AliasChoices, Field
12+
13+
from .xml_base import AsAttrSingle, Items, MixedContent, StrictBaseXMLModel
14+
15+
16+
class Item(StrictBaseXMLModel, tags=["item"]):
17+
item_class: str = Field(validation_alias="class", serialization_alias="class")
18+
contents: MixedContent
19+
20+
21+
class Record(StrictBaseXMLModel, tags=["record"]):
22+
item_class: Optional[str] = Field(
23+
validation_alias="class", serialization_alias="class", default=None
24+
)
25+
items: Items[Item]
26+
27+
28+
class Category(StrictBaseXMLModel, tags=["category"]):
29+
type: str
30+
name: str
31+
records: Items[Record]
32+
33+
34+
class PersonEntry(StrictBaseXMLModel, tags=["regmem"]):
35+
person_id: str = Field(
36+
validation_alias=AliasChoices("person_id", "personid"),
37+
serialization_alias="personid",
38+
)
39+
membername: str
40+
date: date
41+
record: AsAttrSingle[Optional[Record]]
42+
categories: Items[Category] = Field(
43+
default_factory=list,
44+
validation_alias=AliasChoices("categories", "@children"),
45+
serialization_alias="@children",
46+
)
47+
48+
49+
class Register(StrictBaseXMLModel, tags=["publicwhip"]):
50+
person_entires: Items[PersonEntry]

src/mysoc_validator/models/popolo.py

+19-17
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
1+
"""
2+
Structure for handling the main Parlparse people.json file
3+
Following the general shape of the popolo standard.
4+
5+
"""
6+
17
from __future__ import annotations
28

9+
import json
310
import re
411
from bisect import bisect_left
512
from datetime import date
613
from enum import Enum
714
from itertools import groupby
815
from pathlib import Path
9-
from types import TracebackType
1016
from typing import (
1117
Annotated,
1218
Any,
@@ -1204,12 +1210,20 @@ def model_post_init(self, __context: Any) -> None:
12041210
self.posts.set_parent(self)
12051211

12061212
@classmethod
1207-
def from_path(cls, json_path: Path) -> Popolo:
1208-
return cls.model_validate_json(json_path.read_text())
1213+
def from_json_str(cls, json_str: str, *, validate: bool = True) -> Popolo:
1214+
if validate:
1215+
return cls.model_validate_json(json_str)
1216+
else:
1217+
data = json.loads(json_str)
1218+
return cls.model_construct(data)
1219+
1220+
@classmethod
1221+
def from_path(cls, json_path: Path, validate: bool = True) -> Popolo:
1222+
return cls.from_json_str(json_path.read_text(), validate=validate)
12091223

12101224
@classmethod
1211-
def from_url(cls, url: str) -> Popolo:
1212-
return cls.model_validate_json(requests.get(url).text)
1225+
def from_url(cls, url: str, validate: bool = True) -> Popolo:
1226+
return cls.from_json_str(requests.get(url).text, validate=validate)
12131227

12141228
@classmethod
12151229
def from_parlparse(cls, branch: str = "master") -> Popolo:
@@ -1230,15 +1244,3 @@ def to_json_str(self) -> str:
12301244
def to_path(self, json_path: Path) -> None:
12311245
data = self.to_json_str()
12321246
json_path.write_text(data)
1233-
1234-
def __enter__(self, json_path: Path) -> Popolo:
1235-
self._json_path = json_path
1236-
return self.from_path(json_path)
1237-
1238-
def __exit__(
1239-
self,
1240-
exc_type: type[BaseException] | None,
1241-
exc_val: BaseException | None,
1242-
exc_tb: TracebackType | None,
1243-
) -> None:
1244-
self.to_path(self._json_path)

src/mysoc_validator/models/transcripts.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""
2+
Structure for handling a transcript file.
3+
"""
4+
15
from __future__ import annotations
26

37
from typing import (
@@ -16,7 +20,7 @@
1620
from pydantic import AliasChoices, Discriminator, Field, Tag
1721

1822
from .xml_base import (
19-
AsAttr,
23+
AsAttrSingle,
2024
BaseXMLModel,
2125
Items,
2226
MixedContent,
@@ -142,6 +146,8 @@ class RepList(
142146
StrictBaseXMLModel,
143147
tags=["replist", "mplist", "msplist", "mslist", "mlalist", "lordlist"],
144148
):
149+
# this duplication is in the sources - twfy internally converts to
150+
# aye, no, both, absent
145151
vote: Literal[
146152
"aye",
147153
"no",
@@ -177,9 +183,9 @@ class Division(StrictBaseXMLModel, tags=["division"]):
177183
divnumber: int
178184
colnum: Optional[int] = None
179185
time: Optional[str] = None
180-
count: AsAttr[Optional[DivisionCount]]
186+
count: AsAttrSingle[Optional[DivisionCount]]
181187
motion: Optional[Motion] = None
182-
items: Items[RepList]
188+
representatives: Items[RepList]
183189

184190

185191
def extract_tag(v: Any) -> str:
@@ -188,9 +194,11 @@ def extract_tag(v: Any) -> str:
188194

189195
class Transcript(StrictBaseXMLModel, tags=["publicwhip"]):
190196
scraper_version: Optional[str] = Field(
191-
default=None, validation_alias="scraperversion"
197+
default=None,
198+
validation_alias=AliasChoices("scraper_version", "scraperversion"),
199+
serialization_alias="scraperversion",
192200
)
193-
latest: Optional[str] = Field(default=None, validation_alias="latest")
201+
latest: Optional[str] = Field(default=None)
194202
items: Items[
195203
Annotated[
196204
Union[

tests/test_interests.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from pathlib import Path
2+
3+
from mysoc_validator.models.interests import Register
4+
5+
6+
def test_interests_load():
7+
Register.from_xml_path(Path("data", "regmem2024-05-28.xml"))
8+
9+
10+
def test_interests_round_trip():
11+
t = Register.from_xml_path(Path("data", "regmem2024-05-28.xml"))
12+
13+
dumped_xml = t.model_dump_xml()
14+
15+
t2 = Register.model_validate_xml(dumped_xml)
16+
17+
dumped_xml_2 = t2.model_dump_xml()
18+
19+
assert dumped_xml == dumped_xml_2

tests/test_transcript.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from pathlib import Path
2+
3+
from mysoc_validator.models.transcripts import Transcript
4+
5+
6+
def test_transcript_load():
7+
Transcript.from_xml_path(Path("data", "debates2023-03-28d.xml"))
8+
9+
10+
def test_transcript_round_trip():
11+
t = Transcript.from_xml_path(Path("data", "debates2023-03-28d.xml"))
12+
13+
dumped_xml = t.model_dump_xml()
14+
15+
t2 = Transcript.model_validate_xml(dumped_xml)
16+
17+
dumped_xml_2 = t2.model_dump_xml()
18+
19+
assert dumped_xml == dumped_xml_2

0 commit comments

Comments
 (0)