Skip to content

Commit

Permalink
Add support for date-typed attributes. (#134)
Browse files Browse the repository at this point in the history
Adds support for date-typed attributes.
Fix notebooks for CentroidType/DType usage.
AttributeDef now checks its type definition for acceptability.
Regen geos so they load with proper types in AttributeDef.
Clean up old commented code.
  • Loading branch information
JavadocMD authored Jul 15, 2024
1 parent 340ba58 commit 75de9e2
Show file tree
Hide file tree
Showing 15 changed files with 147 additions and 77 deletions.
4 changes: 2 additions & 2 deletions doc/devlog/2023-07-06.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"import numpy as np\n",
"\n",
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.data_type import CentroidDType, CentroidType\n",
"from epymorph.geo.spec import StaticGeoSpec, Year\n",
"from epymorph.geography.us_census import StateScope\n",
"from epymorph.simulation import AttributeDef\n",
Expand All @@ -34,7 +34,7 @@
" attributes=[\n",
" AttributeDef('label', str, Shapes.N),\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('centroid', CentroidType, Shapes.N),\n",
" AttributeDef('population', int, Shapes.N),\n",
" AttributeDef('commuters', int, Shapes.NxN),\n",
" AttributeDef('humidity', float, Shapes.TxN),\n",
Expand Down
8 changes: 4 additions & 4 deletions doc/devlog/2023-07-07.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"from census import Census\n",
"\n",
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.data_type import CentroidDType, CentroidType\n",
"from epymorph.error import GeoValidationException\n",
"from epymorph.geo.spec import LABEL, StaticGeoSpec, Year\n",
"from epymorph.geo.static import StaticGeo\n",
Expand All @@ -47,7 +47,7 @@
" attributes=[\n",
" LABEL,\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('centroid', CentroidType, Shapes.N),\n",
" AttributeDef('population', int, Shapes.N),\n",
" AttributeDef('commuters', int, Shapes.NxN),\n",
" ],\n",
Expand Down Expand Up @@ -211,7 +211,7 @@
{
"data": {
"text/plain": [
"<epymorph.geo.static.StaticGeo at 0x7f0c8a6dc390>"
"<epymorph.geo.static.StaticGeo at 0x745351b80690>"
]
},
"execution_count": 6,
Expand Down Expand Up @@ -311,7 +311,7 @@
{
"data": {
"text/plain": [
"<epymorph.geo.static.StaticGeo at 0x7f0c8f1db850>"
"<epymorph.geo.static.StaticGeo at 0x74534ac419d0>"
]
},
"execution_count": 10,
Expand Down
4 changes: 2 additions & 2 deletions doc/devlog/2023-07-12.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"from census import Census\n",
"\n",
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.data_type import CentroidDType, CentroidType\n",
"from epymorph.error import GeoValidationException\n",
"from epymorph.geo.spec import LABEL, StaticGeoSpec, Year\n",
"from epymorph.geo.static import StaticGeo\n",
Expand All @@ -39,7 +39,7 @@
"attributes: list[AttributeDef] = [\n",
" LABEL,\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('centroid', CentroidType, Shapes.N),\n",
" AttributeDef('population', int, Shapes.N),\n",
" # AttributeDef('population_by_age', int, Shapes.NxA(3)),\n",
" # AttributeDef('population_by_age_x6', int, Shapes.NxA(6)),\n",
Expand Down
5 changes: 3 additions & 2 deletions doc/devlog/2024-03-19.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"outputs": [],
"source": [
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.data_type import CentroidType\n",
"from epymorph.geo.spec import DynamicGeoSpec, Year\n",
"from epymorph.geography.us_census import CountyScope\n",
"from epymorph.simulation import AttributeDef\n",
Expand All @@ -26,7 +26,7 @@
" AttributeDef('label', type=str, shape=Shapes.N),\n",
" AttributeDef('population', type=int, shape=Shapes.N),\n",
" # AttributeDef('population_by_age', dtype=int, shape=Shapes.NxA(3)),\n",
" AttributeDef('centroid', type=CentroidDType, shape=Shapes.N),\n",
" AttributeDef('centroid', type=CentroidType, shape=Shapes.N),\n",
" AttributeDef('geoid', type=str, shape=Shapes.N),\n",
" AttributeDef('dissimilarity_index', type=float, shape=Shapes.N),\n",
" AttributeDef('median_income', type=int, shape=Shapes.N),\n",
Expand Down Expand Up @@ -62,6 +62,7 @@
"# Test that we can load this geo back successfully...\n",
"\n",
"from typing import cast\n",
"\n",
"from epymorph import geo_library\n",
"from epymorph.error import GeoValidationException\n",
"from epymorph.geo.dynamic import DynamicGeo\n",
Expand Down
8 changes: 4 additions & 4 deletions doc/devlog/2024-06-03.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"outputs": [],
"source": [
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.data_type import CentroidType\n",
"from epymorph.geo.adrio import adrio_maker_library\n",
"from epymorph.geo.dynamic import DynamicGeo\n",
"from epymorph.geo.spec import DynamicGeoSpec, Year\n",
Expand All @@ -33,7 +33,7 @@
" AttributeDef('population', int, Shapes.N),\n",
" # AttributeDef('population_by_age', int, Shapes.NxA(3)),\n",
" # AttributeDef('population_by_age_x6', int, Shapes.NxA(6)),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('centroid', CentroidType, Shapes.N),\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('average_household_size', int, Shapes.N),\n",
" AttributeDef('dissimilarity_index', float, Shapes.N),\n",
Expand Down Expand Up @@ -130,7 +130,7 @@
" AttributeDef('population', int, Shapes.N),\n",
" # AttributeDef('population_by_age', int, Shapes.NxA(3)),\n",
" # AttributeDef('population_by_age_x6', int, Shapes.NxA(6)),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('centroid', CentroidType, Shapes.N),\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('average_household_size', int, Shapes.N),\n",
" AttributeDef('dissimilarity_index', float, Shapes.N),\n",
Expand Down Expand Up @@ -168,7 +168,7 @@
" AttributeDef('population', int, Shapes.N),\n",
" # AttributeDef('population_by_age', int, Shapes.NxA(3)),\n",
" # AttributeDef('population_by_age_x6', int, Shapes.NxA(6)),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('centroid', CentroidType, Shapes.N),\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('average_household_size', int, Shapes.N),\n",
" AttributeDef('gini_index', float, Shapes.N),\n",
Expand Down
48 changes: 32 additions & 16 deletions doc/devlog/2024-07-10.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,27 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.data_type import CentroidDType, CentroidType\n",
"from epymorph.geo.adrio.census.adrio_census import ADRIOMakerCensus\n",
"from epymorph.geo.spec import Year\n",
"from epymorph.geography.us_census import CountyScope\n",
"from epymorph.simulation import geo_attrib\n",
"from epymorph.simulation import AttributeDef\n",
"\n",
"# make adrios for one attribute from each fetch method\n",
"maker = ADRIOMakerCensus()\n",
"geoids = ['04001', '04003', '04005', '04013', '04017']\n",
"scope = CountyScope.in_counties(geoids)\n",
"time_period = Year(2020)\n",
"attribs = [geo_attrib('population', int, Shapes.N), geo_attrib(\n",
" 'centroid', CentroidDType, Shapes.N), geo_attrib('commuters', int, Shapes.NxN)]\n",
"attribs = [\n",
" AttributeDef('population', int, Shapes.N),\n",
" AttributeDef('centroid', CentroidType, Shapes.N),\n",
" AttributeDef('commuters', int, Shapes.NxN),\n",
"]\n",
"\n",
"population = maker.make_adrio(attribs[0], scope, time_period)\n",
"centroid = maker.make_adrio(attribs[1], scope, time_period)\n",
Expand All @@ -43,7 +46,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -59,15 +62,27 @@
"source": [
"import numpy as np\n",
"\n",
"from epymorph.util import check_ndarray\n",
"from epymorph.util import check_ndarray, match\n",
"\n",
"T = time_period.days\n",
"N = len(population.get_value())\n",
"\n",
"# validate datatype and shape\n",
"check_ndarray(population.get_value(), dtype=[int], shape=attribs[0].shape.as_tuple(\n",
" len(population.get_value()), time_period.days))\n",
"check_ndarray(centroid.get_value(), dtype=[CentroidDType], shape=attribs[1].shape.as_tuple(\n",
" len(population.get_value()), time_period.days))\n",
"check_ndarray(commuters.get_value(), dtype=[int], shape=attribs[2].shape.as_tuple(\n",
" len(population.get_value()), time_period.days))\n",
"check_ndarray(\n",
" population.get_value(),\n",
" dtype=match.dtype(int),\n",
" shape=match.shape_literal((N,))\n",
")\n",
"check_ndarray(\n",
" centroid.get_value(),\n",
" dtype=match.dtype(CentroidDType),\n",
" shape=match.shape_literal((N,))\n",
")\n",
"check_ndarray(\n",
" commuters.get_value(),\n",
" dtype=match.dtype(int),\n",
" shape=match.shape_literal((N, N))\n",
")\n",
"\n",
"# values retrieved manually from Census table B01001\n",
"population_array = [71714, 126442, 142254, 4412779, 110271]\n",
Expand Down Expand Up @@ -104,12 +119,13 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from io import BytesIO\n",
"from urllib.request import urlopen\n",
"\n",
"from geopandas import read_file\n",
"\n",
"# load in shapefile data for use in centroid caclulations\n",
Expand All @@ -126,7 +142,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -145,7 +161,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down
Binary file modified epymorph/data/geo/maricopa_cbg_2019.geo.tgz
Binary file not shown.
Binary file modified epymorph/data/geo/pei.geo.tgz
Binary file not shown.
Binary file modified epymorph/data/geo/us_counties_2015.geo.tgz
Binary file not shown.
Binary file modified epymorph/data/geo/us_states_2015.geo.tgz
Binary file not shown.
2 changes: 1 addition & 1 deletion epymorph/data/geo/us_sw_counties_2015.geo
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"py/object": "epymorph.geo.spec.DynamicGeoSpec", "py/state": {"attributes": [{"py/object": "epymorph.simulation.AttributeDef", "name": "label", "type": {"py/type": "builtins.str"}, "shape": {"py/object": "epymorph.data_shape.Node"}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "population", "type": {"py/type": "builtins.int"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "centroid", "type": [{"py/tuple": ["longitude", {"py/type": "builtins.float"}]}, {"py/tuple": ["latitude", {"py/type": "builtins.float"}]}], "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "geoid", "type": {"py/type": "builtins.str"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "dissimilarity_index", "type": {"py/type": "builtins.float"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "median_income", "type": {"py/type": "builtins.int"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "pop_density_km2", "type": {"py/type": "builtins.float"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "commuters", "type": {"py/type": "builtins.int"}, "shape": {"py/object": "epymorph.data_shape.NodeAndNode"}, "default_value": null, "comment": null}], "time_period": {"py/object": "epymorph.geo.spec.Year", "year": 2015, "days": 365, "start_date": {"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B98BAQ=="]]}, "end_date": {"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B+ABAQ=="]]}}, "scope": {"py/object": "epymorph.geography.us_census.CountyScope", "year": 2010, "includes_granularity": "state", "includes": ["04", "08", "49", "35", "32"]}, "source": {"label": "Census:name", "population": "Census", "centroid": "Census", "geoid": "Census", "dissimilarity_index": "Census", "median_income": "Census", "pop_density_km2": "Census", "commuters": "Census"}}}
{"py/object": "epymorph.geo.spec.DynamicGeoSpec", "py/state": {"attributes": [{"py/object": "epymorph.simulation.AttributeDef", "name": "label", "type": {"py/type": "builtins.str"}, "shape": {"py/object": "epymorph.data_shape.Node"}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "population", "type": {"py/type": "builtins.int"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "centroid", "type": [{"py/tuple": ["longitude", {"py/type": "builtins.float"}]}, {"py/tuple": ["latitude", {"py/type": "builtins.float"}]}], "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "geoid", "type": {"py/type": "builtins.str"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "dissimilarity_index", "type": {"py/type": "builtins.float"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "median_income", "type": {"py/type": "builtins.int"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "pop_density_km2", "type": {"py/type": "builtins.float"}, "shape": {"py/id": 4}, "default_value": null, "comment": null}, {"py/object": "epymorph.simulation.AttributeDef", "name": "commuters", "type": {"py/type": "builtins.int"}, "shape": {"py/object": "epymorph.data_shape.NodeAndNode"}, "default_value": null, "comment": null}], "scope": {"py/object": "epymorph.geography.us_census.CountyScope", "year": 2010, "includes_granularity": "state", "includes": ["04", "08", "49", "35", "32"]}, "time_period": {"py/object": "epymorph.geo.spec.Year", "year": 2015, "days": 365, "start_date": {"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B98BAQ=="]]}, "end_date": {"py/object": "datetime.date", "__reduce__": [{"py/type": "datetime.date"}, ["B+ABAQ=="]]}}, "source": {"label": "Census:name", "population": "Census", "centroid": "Census", "geoid": "Census", "dissimilarity_index": "Census", "median_income": "Census", "pop_density_km2": "Census", "commuters": "Census"}}}
77 changes: 35 additions & 42 deletions epymorph/data_type.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,26 @@
"""
Types for source data and attributes in epymorph.
"""
from datetime import date
from typing import Any, Sequence

import numpy as np
from numpy.typing import DTypeLike, NDArray

# _DataPyBasic = int | float | str
# _DataPyTuple = tuple[_DataPyBasic, ...]
# support recursively-nested lists
# _DataPyList = Sequence[Union[_DataPyBasic, _DataPyTuple, '_DataPyList']]
# _DataPy = _DataPyBasic | _DataPyTuple | _DataPyList

# DataPyScalar = _DataPyBasic | _DataPyTuple
# DataScalar = _DataPyBasic | _DataPyTuple | _DataNpScalar
# """The allowed scalar types (either python or numpy equivalents)."""

# Types for attribute declarations:
# these are expressed as Python types for simplicity.

ScalarType = type[int | float | str]
ScalarType = type[int | float | str | date]
StructType = Sequence[tuple[str, ScalarType]]
AttributeType = ScalarType | StructType
"""The allowed type declarations for epymorph attributes."""

ScalarValue = int | float | str
ScalarValue = int | float | str | date
StructValue = tuple[ScalarValue, ...]
AttributeValue = ScalarValue | StructValue
"""The allowed types for epymorph attribute values (specifically: default values)."""

ScalarDType = np.int64 | np.float64 | np.str_
ScalarDType = np.int64 | np.float64 | np.str_ | np.datetime64
StructDType = np.void
AttributeDType = ScalarDType | StructDType
"""The subset of numpy dtypes for use in epymorph: these map 1:1 with AttributeType."""
Expand All @@ -45,10 +36,19 @@ def dtype_as_np(dtype: AttributeType) -> np.dtype:
return np.dtype(np.float64)
if dtype == str:
return np.dtype(np.str_)
if isinstance(dtype, list):
return np.dtype(dtype)
if dtype == date:
return np.dtype(np.datetime64)
if isinstance(dtype, Sequence):
return np.dtype(list(dtype))
dtype = list(dtype)
if len(dtype) == 0:
raise ValueError(f"Unsupported dtype: {dtype}")
try:
return np.dtype([
(field_name, dtype_as_np(field_dtype))
for field_name, field_dtype in dtype
])
except TypeError:
raise ValueError(f"Unsupported dtype: {dtype}") from None
raise ValueError(f"Unsupported dtype: {dtype}")


Expand All @@ -60,51 +60,44 @@ def dtype_str(dtype: AttributeType) -> str:
return "float"
if dtype == str:
return "str"
if dtype == date:
return "date"
if isinstance(dtype, Sequence):
values = (f"({x[0]}, {dtype_str(x[1])})" for x in dtype)
return f"[{', '.join(values)}]"
dtype = list(dtype)
if len(dtype) == 0:
raise ValueError(f"Unsupported dtype: {dtype}")
try:
values = [
f"({field_name}, {dtype_str(field_dtype)})"
for field_name, field_dtype in dtype
]
return f"[{', '.join(values)}]"
except TypeError:
raise ValueError(f"Unsupported dtype: {dtype}") from None
raise ValueError(f"Unsupported dtype: {dtype}")


def dtype_check(dtype: AttributeType, value: Any) -> bool:
"""Checks that a value conforms to a given dtype. (Python types only.)"""
if dtype in (int, float, str):
if dtype in (int, float, str, date):
return isinstance(value, dtype)
if isinstance(dtype, Sequence):
dtype = list(dtype)
if not isinstance(value, tuple):
return False
if len(value) != len(dtype):
return False
return all((
dtype_check(vtype, v)
for ((_, vtype), v) in zip(dtype, value)
dtype_check(field_dtype, field_value)
for ((_, field_dtype), field_value) in zip(dtype, value)
))
raise ValueError(f"Unsupported dtype: {dtype}")


# ParamFunction = Callable[[int, int], DataScalar]
# """
# Params may be defined as functions of time (day) and geo node (index),
# returning a python or numpy scalar value.
# """

# RawParam = _DataPy | _DataNp | ParamFunction
# """
# Types for raw parameter values. Users can supply any of these forms when constructing
# simulation parameters.
# """

# AttributeScalar = _DataNpScalar
# AttributeArray = _DataNpArray
# """
# The type of all data attributes, whether in geo or params (after normalization).
# """


CentroidType: AttributeType = [('longitude', float), ('latitude', float)]
"""Structured epymorph type declaration for long/lat coordinates."""
CentroidDType: DTypeLike = [('longitude', float), ('latitude', float)]
"""Structured numpy dtype for long/lat coordinates."""
CentroidDType: DTypeLike = [('longitude', np.float64), ('latitude', np.float64)]
"""The numpy equivalent of `CentroidType` (structured dtype for long/lat coordinates)."""

# SimDType being centrally-located means we can change it reliably.
SimDType = np.int64
Expand Down
Loading

0 comments on commit 75de9e2

Please sign in to comment.