Skip to content

Commit

Permalink
Census maker geoscope (#115)
Browse files Browse the repository at this point in the history
Refactor of Census ADRIO maker to use newly implemented GeoScope specification and us_tiger shapefile retrieval.
  • Loading branch information
TJohnsonAZ authored Jun 5, 2024
1 parent 27a2cdd commit 5d12231
Show file tree
Hide file tree
Showing 15 changed files with 719 additions and 422 deletions.
21 changes: 7 additions & 14 deletions doc/demo/02-states-GEO.ipynb

Large diffs are not rendered by default.

23 changes: 8 additions & 15 deletions doc/demo/03-counties-GEO.ipynb

Large diffs are not rendered by default.

9 changes: 2 additions & 7 deletions doc/devlog/2024-03-19.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"source": [
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.geo.adrio.census.adrio_census import CensusGeography, Granularity\n",
"from epymorph.geo.spec import DynamicGeoSpec, Year, attrib\n",
"from epymorph.geography.us_census import CountyScope\n",
"\n",
"spec = DynamicGeoSpec(\n",
" attributes=[\n",
Expand All @@ -33,12 +33,7 @@
" attrib('commuters', dtype=int, shape=Shapes.NxN),\n",
" ],\n",
" time_period=Year(2015),\n",
" geography=CensusGeography(granularity=Granularity.COUNTY, filter={\n",
" 'state': ['04', '08', '49', '35', '32'],\n",
" 'county': ['*'],\n",
" 'tract': ['*'],\n",
" 'block group': ['*']\n",
" }),\n",
" scope=CountyScope.in_states(['04', '08', '49', '35', '32'], 2010),\n",
" source={\n",
" 'label': 'Census:name',\n",
" 'population': 'Census',\n",
Expand Down
14 changes: 7 additions & 7 deletions doc/devlog/2024-05-03.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -29,11 +29,11 @@
"\n",
"\n",
"def test_year(year: c.CensusYear) -> None:\n",
" # 1. test that we have 56 states\n",
" # 1. test that we have 52 states\n",
" states = c.get_us_states(year).geoid\n",
"\n",
" if len(states) != 56:\n",
" raise Fail(\"There weren't 56 states!\")\n",
" if len(states) != 52:\n",
" raise Fail(\"There weren't 52 states!\")\n",
"\n",
" # 2. test that each state contains at least one county\n",
" counties = c.get_us_counties(year).geoid\n",
Expand Down Expand Up @@ -73,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -90,7 +90,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -107,7 +107,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down
217 changes: 217 additions & 0 deletions doc/devlog/2024-06-03.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# devlog 2024-06-03\n",
"\n",
"_author: Trevor Johnson_\n",
"\n",
"ADRIOMakerCensus has been refactored to utilize the recently added GeoScope class heirarchy. This notebook tests the correct functionality of Census-based ADRIOs post-refactor by creating a DynamicGeo for each granularity and populating them with every attribute that is valid for their granularity.\n",
"\n",
"Additional cases can be tested by changing the type of the scope objects, changing the year in time period or scope, or changing the includes attribute of the scope object."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.geo.adrio import adrio_maker_library\n",
"from epymorph.geo.dynamic import DynamicGeo\n",
"from epymorph.geo.spec import DynamicGeoSpec, Year\n",
"from epymorph.geography.us_census import DEFAULT_YEAR, StateScopeAll\n",
"from epymorph.simulation import geo_attrib\n",
"\n",
"spec = DynamicGeoSpec(\n",
" attributes=[\n",
" geo_attrib('label', str, Shapes.N),\n",
" geo_attrib('population', int, Shapes.N),\n",
" geo_attrib('population_by_age', int, Shapes.NxA(3)),\n",
" geo_attrib('population_by_age_x6', int, Shapes.NxA(6)),\n",
" geo_attrib('centroid', CentroidDType, Shapes.N),\n",
" geo_attrib('geoid', str, Shapes.N),\n",
" geo_attrib('average_household_size', int, Shapes.N),\n",
" geo_attrib('dissimilarity_index', float, Shapes.N),\n",
" geo_attrib('commuters', int, Shapes.NxN),\n",
" geo_attrib('gini_index', float, Shapes.N),\n",
" geo_attrib('median_age', int, Shapes.N),\n",
" geo_attrib('median_income', int, Shapes.N),\n",
" geo_attrib('pop_density_km2', float, Shapes.N)\n",
" ],\n",
" time_period=Year(2020),\n",
" scope=StateScopeAll(DEFAULT_YEAR),\n",
" source={\n",
" 'label': 'Census:name',\n",
" 'population': 'Census',\n",
" 'population_by_age': 'Census',\n",
" 'population_by_age_x6': 'Census',\n",
" 'centroid': 'Census',\n",
" 'geoid': 'Census',\n",
" 'average_household_size': 'Census',\n",
" 'dissimilarity_index': 'Census',\n",
" 'commuters': 'Census',\n",
" 'gini_index': 'Census',\n",
" 'median_age': 'Census',\n",
" 'median_income': 'Census',\n",
" 'pop_density_km2': 'Census',\n",
" 'tract_median_income': 'Census'\n",
" }\n",
")\n",
"\n",
"geo = DynamicGeo.from_library(spec, adrio_maker_library)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"geo.validate()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from dataclasses import replace\n",
"\n",
"from epymorph.geography.us_census import (BlockGroupScope, CountyScope,\n",
" StateScope, TractScope)\n",
"\n",
"spec = replace(spec, scope=StateScope.in_states(['04', '08']))\n",
"geo = DynamicGeo.from_library(spec, adrio_maker_library)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"geo.validate()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spec = replace(spec, scope=CountyScope.in_counties(['35001', '04013', '04017']))\n",
"geo = DynamicGeo.from_library(spec, adrio_maker_library)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"geo.validate()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spec = replace(spec, scope=TractScope.in_tracts(['35001000720', '35001000904', '35001000906',\n",
" '04027011405', '04027011407']), attributes=[\n",
" geo_attrib('label', str, Shapes.N),\n",
" geo_attrib('population', int, Shapes.N),\n",
" geo_attrib('population_by_age', int, Shapes.NxA(3)),\n",
" geo_attrib('population_by_age_x6', int, Shapes.NxA(6)),\n",
" geo_attrib('centroid', CentroidDType, Shapes.N),\n",
" geo_attrib('geoid', str, Shapes.N),\n",
" geo_attrib('average_household_size', int, Shapes.N),\n",
" geo_attrib('dissimilarity_index', float, Shapes.N),\n",
" geo_attrib('gini_index', float, Shapes.N),\n",
" geo_attrib('median_age', int, Shapes.N),\n",
" geo_attrib('median_income', int, Shapes.N),\n",
" geo_attrib('pop_density_km2', float, Shapes.N)\n",
"])\n",
"\n",
"geo = DynamicGeo.from_library(spec, adrio_maker_library)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# tract and block group geos fetch shape file attributes prior to validating so that the kernel\n",
"# is not overwhelmed by several large shapefile requests in parallel\n",
"geo['centroid']\n",
"geo['pop_density_km2']\n",
"geo.validate()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spec = replace(spec, scope=BlockGroupScope.in_block_groups(['350010007201', '350010009041', '350010009061',\n",
" '040270114053', '040270114072']), attributes=[\n",
" geo_attrib('label', str, Shapes.N),\n",
" geo_attrib('population', int, Shapes.N),\n",
" geo_attrib('population_by_age', int, Shapes.NxA(3)),\n",
" geo_attrib('population_by_age_x6', int, Shapes.NxA(6)),\n",
" geo_attrib('centroid', CentroidDType, Shapes.N),\n",
" geo_attrib('geoid', str, Shapes.N),\n",
" geo_attrib('average_household_size', int, Shapes.N),\n",
" geo_attrib('gini_index', float, Shapes.N),\n",
" geo_attrib('median_age', int, Shapes.N),\n",
" geo_attrib('median_income', int, Shapes.N),\n",
" geo_attrib('pop_density_km2', float, Shapes.N),\n",
" geo_attrib('tract_median_income', int, Shapes.N)\n",
"])\n",
"\n",
"geo = DynamicGeo.from_library(spec, adrio_maker_library)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"geo['centroid']\n",
"geo['pop_density_km2']\n",
"geo.validate()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
1 change: 1 addition & 0 deletions doc/devlog/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ This folder is a handy place to put Jupyter notebooks or other documents which h
| 2024-04-25.ipynb | Tyler || Integration test: epymorph cache utilities |
| 2024-05-03.ipynb | Tyler || Integration test: loading US Census geography from TIGER |
| 2024-05-22.ipynb | Sachin | | Integrating particle filter with epymorph. Propagating the particles using epymorph simulation and plot the infection rates |
| 2024-06-03.ipynb | Trevor | | Integration test: using dynamic geos to fetch Census data |

## Contributing

Expand Down
2 changes: 1 addition & 1 deletion epymorph/data/geo/us_sw_counties_2015.geo
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"py/object": "epymorph.geo.spec.DynamicGeoSpec", "py/state": {"attributes": [{"py/object": "epymorph.simulation.AttributeDef", "py/state": ["label", "geo", {"py/type": "builtins.str"}, {"py/object": "epymorph.data_shape.Node"}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "label"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["population", "geo", {"py/type": "builtins.int"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "population"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["population_by_age", "geo", {"py/type": "builtins.int"}, {"py/object": "epymorph.data_shape.NodeAndArbitrary", "index": 3}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "population_by_age"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["centroid", "geo", [{"py/tuple": ["longitude", {"py/type": "builtins.float"}]}, {"py/tuple": ["latitude", {"py/type": "builtins.float"}]}], {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "centroid"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["geoid", "geo", {"py/type": "builtins.str"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "geoid"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["dissimilarity_index", "geo", {"py/type": "builtins.float"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "dissimilarity_index"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["median_income", "geo", {"py/type": "builtins.int"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "median_income"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["pop_density_km2", "geo", {"py/type": "builtins.float"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "pop_density_km2"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["commuters", "geo", {"py/type": "builtins.int"}, {"py/object": "epymorph.data_shape.NodeAndNode"}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "commuters"]}]}, null, null]}], "time_period": {"py/object": "epymorph.geo.spec.Year", "year": 2015}, "geography": {"py/object": "epymorph.geo.adrio.census.adrio_census.CensusGeography", "granularity": {"py/reduce": [{"py/type": "epymorph.geo.adrio.census.adrio_census.Granularity"}, {"py/tuple": [1]}]}, "filter": {"state": ["04", "08", "49", "35", "32"], "county": ["*"], "tract": ["*"], "block group": ["*"]}}, "source": {"label": "Census:name", "population": "Census", "population_by_age": "Census", "centroid": "Census", "geoid": "Census", "dissimilarity_index": "Census", "median_income": "Census", "pop_density_km2": "Census", "commuters": "Census"}}}
{"py/object": "epymorph.geo.spec.DynamicGeoSpec", "py/state": {"attributes": [{"py/object": "epymorph.simulation.AttributeDef", "py/state": ["label", "geo", {"py/type": "builtins.str"}, {"py/object": "epymorph.data_shape.Node"}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "label"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["population", "geo", {"py/type": "builtins.int"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "population"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["population_by_age", "geo", {"py/type": "builtins.int"}, {"py/object": "epymorph.data_shape.NodeAndArbitrary", "index": 3}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "population_by_age"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["centroid", "geo", [{"py/tuple": ["longitude", {"py/type": "builtins.float"}]}, {"py/tuple": ["latitude", {"py/type": "builtins.float"}]}], {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "centroid"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["geoid", "geo", {"py/type": "builtins.str"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "geoid"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["dissimilarity_index", "geo", {"py/type": "builtins.float"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "dissimilarity_index"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["median_income", "geo", {"py/type": "builtins.int"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "median_income"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["pop_density_km2", "geo", {"py/type": "builtins.float"}, {"py/id": 5}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "pop_density_km2"]}]}, null, null]}, {"py/object": "epymorph.simulation.AttributeDef", "py/state": ["commuters", "geo", {"py/type": "builtins.int"}, {"py/object": "epymorph.data_shape.NodeAndNode"}, {"py/reduce": ["py/newobj", {"py/tuple": [{"py/type": "sympy.core.symbol.Symbol"}, "commuters"]}]}, null, null]}], "time_period": {"py/object": "epymorph.geo.spec.Year", "year": 2015}, "scope": {"py/object": "epymorph.geography.us_census.CountyScope", "includes_granularity": "state", "includes": ["04", "08", "49", "35", "32"], "year": 2010}, "source": {"label": "Census:name", "population": "Census", "population_by_age": "Census", "centroid": "Census", "geoid": "Census", "dissimilarity_index": "Census", "median_income": "Census", "pop_density_km2": "Census", "commuters": "Census"}}}
4 changes: 4 additions & 0 deletions epymorph/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def pretty(self) -> str:
return str(self) + "".join((f"\n- {e}" for e in self.attribute_errors))


class DataResourceException(Exception):
"""Exception during resource loading from ADRIOs."""


class IpmValidationException(ValidationException):
"""Exception for invalid IPM."""

Expand Down
5 changes: 3 additions & 2 deletions epymorph/geo/adrio/adrio.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

from numpy.typing import NDArray

from epymorph.geo.spec import Geography, TimePeriod
from epymorph.geo.spec import TimePeriod
from epymorph.geography.scope import GeoScope
from epymorph.simulation import AttributeDef


Expand Down Expand Up @@ -40,7 +41,7 @@ class ADRIOMaker(ABC):
attributes: list[AttributeDef]

@abstractmethod
def make_adrio(self, attrib: AttributeDef, geography: Geography, time_period: TimePeriod) -> ADRIO:
def make_adrio(self, attrib: AttributeDef, scope: GeoScope, time_period: TimePeriod) -> ADRIO:
"""Creates an ADRIO to fetch the specified attribute for the specified time and place."""


Expand Down
Loading

0 comments on commit 5d12231

Please sign in to comment.