Skip to content

Commit

Permalink
Multi-strata simulation refactor.
Browse files Browse the repository at this point in the history
Includes class-based refactor for initializer and parameter functions.
Add spec to StaticGeo. Regenerate geos.
Update vignettes for API changes.
New initializers vignette.
  • Loading branch information
Tyler Coles committed Jul 10, 2024
1 parent 77cc8a1 commit 335f07f
Show file tree
Hide file tree
Showing 103 changed files with 6,493 additions and 3,833 deletions.
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

"python.formatting.provider": "none",
"python.analysis.autoImportCompletions": true,
"python.analysis.typeCheckingMode": "basic",
"python.analysis.typeCheckingMode": "standard",
"python.analysis.diagnosticMode": "workspace",

"python.testing.pytestEnabled": false,
Expand Down
30 changes: 16 additions & 14 deletions doc/demo/01-SIRH-IPM.ipynb

Large diffs are not rendered by default.

71 changes: 35 additions & 36 deletions doc/demo/02-states-GEO.ipynb

Large diffs are not rendered by default.

73 changes: 37 additions & 36 deletions doc/demo/03-counties-GEO.ipynb

Large diffs are not rendered by default.

53 changes: 29 additions & 24 deletions doc/demo/04-time-varying-beta.ipynb

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions doc/demo/05-visualizing-mm.ipynb

Large diffs are not rendered by default.

85 changes: 46 additions & 39 deletions doc/devlog/2023-06-30.ipynb

Large diffs are not rendered by default.

19 changes: 12 additions & 7 deletions doc/devlog/2023-07-06.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,22 @@
"\n",
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.geo.spec import StaticGeoSpec, Year, attrib\n",
"from epymorph.geo.spec import StaticGeoSpec, Year\n",
"from epymorph.geography.us_census import StateScope\n",
"from epymorph.simulation import AttributeDef\n",
"\n",
"spec = StaticGeoSpec(\n",
" attributes=[\n",
" attrib('label', str, Shapes.N),\n",
" attrib('geoid', str, Shapes.N),\n",
" attrib('centroid', CentroidDType, Shapes.N),\n",
" attrib('population', int, Shapes.N),\n",
" attrib('commuters', int, Shapes.NxN),\n",
" attrib('humidity', float, Shapes.TxN),\n",
" AttributeDef('label', str, Shapes.N),\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('population', int, Shapes.N),\n",
" AttributeDef('commuters', int, Shapes.NxN),\n",
" AttributeDef('humidity', float, Shapes.TxN),\n",
" ],\n",
" # critically: these states are listed here in GEOID order,\n",
" # and we maintain that order when entering data below\n",
" scope=StateScope.in_states_by_code(['FL', 'GA', 'MD', 'NC', 'SC', 'VA'], year=2015),\n",
" time_period=Year(2015),\n",
")"
]
Expand Down
38 changes: 24 additions & 14 deletions doc/devlog/2023-07-07.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,33 @@
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.error import GeoValidationException\n",
"from epymorph.geo.spec import LABEL, StaticGeoSpec, Year, attrib\n",
"from epymorph.geo.spec import LABEL, StaticGeoSpec, Year\n",
"from epymorph.geo.static import StaticGeo\n",
"from epymorph.geo.static import StaticGeoFileOps as F\n",
"from epymorph.geography.us_census import StateScope\n",
"from epymorph.simulation import AttributeDef\n",
"\n",
"YEAR = 2015\n",
"NUM_STATES = 52\n",
"NUM_COUNTIES = 3220\n",
"\n",
"spec = StaticGeoSpec(\n",
"state_scope = StateScope.all(year=YEAR)\n",
"county_scope = state_scope.lower_granularity()\n",
"\n",
"# Both state and county geo will have the same attributes, just different scope.\n",
"state_spec = StaticGeoSpec(\n",
" attributes=[\n",
" LABEL,\n",
" attrib('geoid', str, Shapes.N),\n",
" attrib('centroid', CentroidDType, Shapes.N),\n",
" attrib('population', int, Shapes.N),\n",
" attrib('commuters', int, Shapes.NxN)\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('population', int, Shapes.N),\n",
" AttributeDef('commuters', int, Shapes.NxN),\n",
" ],\n",
" time_period=Year(YEAR))\n",
" scope=state_scope,\n",
" time_period=Year(YEAR),\n",
")\n",
"\n",
"county_spec = dataclasses.replace(state_spec, scope=county_scope)\n",
"\n",
"# Initialize Census API\n",
"census = Census(os.environ['CENSUS_API_KEY'])"
Expand Down Expand Up @@ -91,7 +101,7 @@
" \"wrk_state\",\n",
" \"wrk_county\",\n",
" \"workers\",\n",
" \"moe\"\n",
" \"moe\",\n",
" ],\n",
" dtype=str\n",
")\n",
Expand Down Expand Up @@ -142,7 +152,7 @@
"d = pd.DataFrame.from_records(state_data).astype({\n",
" 'NAME': np.str_,\n",
" 'B01003_001E': np.int64,\n",
" 'state': np.str_\n",
" 'state': np.str_,\n",
"})\n",
"d.rename(columns={\n",
" 'NAME': 'label',\n",
Expand Down Expand Up @@ -170,7 +180,7 @@
" 'geoid': d['geoid'].to_numpy(dtype=np.str_),\n",
" 'centroid': d['centroid'].to_numpy(dtype=CentroidDType),\n",
" 'population': d['population'].to_numpy(dtype=np.int64),\n",
" 'commuters': c\n",
" 'commuters': c,\n",
"}\n",
"\n",
"num_states = len(states_values['label'])\n",
Expand All @@ -186,7 +196,7 @@
"source": [
"geofile = Path('epymorph/data/geo') / F.to_archive_filename('us_states_2015')\n",
"try:\n",
" states_geo = StaticGeo(dataclasses.replace(spec), states_values)\n",
" states_geo = StaticGeo(state_spec, states_values)\n",
" states_geo.validate()\n",
" states_geo.save(geofile)\n",
"except GeoValidationException as e:\n",
Expand All @@ -201,7 +211,7 @@
{
"data": {
"text/plain": [
"<epymorph.geo.static.StaticGeo at 0x7ff85c5ebc50>"
"<epymorph.geo.static.StaticGeo at 0x7f0c8a6dc390>"
]
},
"execution_count": 6,
Expand Down Expand Up @@ -286,7 +296,7 @@
"source": [
"geofile = Path('epymorph/data/geo') / F.to_archive_filename('us_counties_2015')\n",
"try:\n",
" counties_geo = StaticGeo(dataclasses.replace(spec), counties_values)\n",
" counties_geo = StaticGeo(county_spec, counties_values)\n",
" counties_geo.validate()\n",
" counties_geo.save(geofile)\n",
"except GeoValidationException as e:\n",
Expand All @@ -301,7 +311,7 @@
{
"data": {
"text/plain": [
"<epymorph.geo.static.StaticGeo at 0x7ff833500290>"
"<epymorph.geo.static.StaticGeo at 0x7f0c8f1db850>"
]
},
"execution_count": 10,
Expand Down
84 changes: 60 additions & 24 deletions doc/devlog/2023-07-12.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,31 @@
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.error import GeoValidationException\n",
"from epymorph.geo.spec import LABEL, StaticGeoSpec, Year, attrib\n",
"from epymorph.geo.spec import LABEL, StaticGeoSpec, Year\n",
"from epymorph.geo.static import StaticGeo\n",
"from epymorph.geo.static import StaticGeoFileOps as F\n",
"from epymorph.geography.scope import ScopeFilter\n",
"from epymorph.geography.us_census import BlockGroupScope\n",
"from epymorph.simulation import AttributeDef\n",
"\n",
"YEAR = 2019\n",
"NUM_COUNTIES = 2494\n",
"\n",
"spec = StaticGeoSpec(\n",
" attributes=[\n",
" LABEL,\n",
" attrib('geoid', str, Shapes.N),\n",
" attrib('centroid', CentroidDType, Shapes.N),\n",
" attrib('population', int, Shapes.N),\n",
" attrib('population_by_age', int, Shapes.NxA(3)),\n",
" attrib('population_by_age_x6', int, Shapes.NxA(6)),\n",
" attrib('median_age', float, Shapes.N),\n",
" attrib('median_income', int, Shapes.N),\n",
" attrib('average_household_size', float, Shapes.N),\n",
" attrib('pop_density_km2', float, Shapes.N),\n",
" attrib('tract_gini_index', float, Shapes.N),\n",
" attrib('tract_median_income', int, Shapes.N),\n",
" ],\n",
" time_period=Year(YEAR))\n",
"attributes: list[AttributeDef] = [\n",
" LABEL,\n",
" AttributeDef('geoid', str, Shapes.N),\n",
" AttributeDef('centroid', CentroidDType, Shapes.N),\n",
" AttributeDef('population', int, Shapes.N),\n",
" # AttributeDef('population_by_age', int, Shapes.NxA(3)),\n",
" # AttributeDef('population_by_age_x6', int, Shapes.NxA(6)),\n",
" AttributeDef('median_age', float, Shapes.N),\n",
" AttributeDef('median_income', int, Shapes.N),\n",
" AttributeDef('average_household_size', float, Shapes.N),\n",
" AttributeDef('pop_density_km2', float, Shapes.N),\n",
" AttributeDef('tract_gini_index', float, Shapes.N),\n",
" AttributeDef('tract_median_income', int, Shapes.N),\n",
"]\n",
"\n",
"\n",
"AGE_VARS = [\n",
" \"B01001_003E\", # Population (Male) 0-4 years\n",
Expand Down Expand Up @@ -234,6 +236,26 @@
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"68 040139411001\n",
"870 040139805001\n",
"1223 040139804001\n",
"1268 040131167331\n",
"1444 040131138021\n",
"1523 040131134001\n",
"1661 040139807001\n",
"1808 040137233061\n",
"2054 040137233031\n",
"2056 040139801001\n",
"2273 040130610171\n",
"Name: geoid, dtype: object"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
Expand Down Expand Up @@ -383,10 +405,14 @@
"cbgs.drop(columns=['tract_geoid'], inplace=True)\n",
"\n",
"# Filter CBGs\n",
"cbgs.drop(cbgs[\n",
"dropped_cbgs = cbgs[\n",
" (cbgs['median_income'] == 0) &\n",
" (cbgs['tract_median_income'] == 0)\n",
"].index, inplace=True)\n",
"]\n",
"\n",
"display(dropped_cbgs['geoid'])\n",
"\n",
"cbgs.drop(dropped_cbgs.index, inplace=True)\n",
"\n",
"cbgs.sort_values(by='geoid', inplace=True)\n",
"cbgs.reset_index(drop=True, inplace=True)\n",
Expand Down Expand Up @@ -547,8 +573,8 @@
" 'geoid': cbgs['geoid'].to_numpy(dtype=np.str_),\n",
" 'centroid': cbgs['centroid'].to_numpy(dtype=CentroidDType),\n",
" 'population': cbgs['population'].to_numpy(dtype=np.int64),\n",
" 'population_by_age': cbgs_age_1.to_numpy(dtype=np.int64),\n",
" 'population_by_age_x6': cbgs_age_2.to_numpy(dtype=np.int64),\n",
" # 'population_by_age': cbgs_age_1.to_numpy(dtype=np.int64),\n",
" # 'population_by_age_x6': cbgs_age_2.to_numpy(dtype=np.int64),\n",
" 'median_age': cbgs['median_age'].to_numpy(dtype=np.float64),\n",
" 'median_income': cbgs['median_income'].to_numpy(dtype=np.int64),\n",
" 'average_household_size': cbgs['average_household_size'].to_numpy(dtype=np.float64),\n",
Expand All @@ -568,6 +594,16 @@
"metadata": {},
"outputs": [],
"source": [
"spec = StaticGeoSpec(\n",
" attributes=attributes,\n",
" # Maricopa County, AZ is GEOID 04013\n",
" scope=ScopeFilter(\n",
" parent=BlockGroupScope.in_counties(['04013'], year=YEAR),\n",
" remove=dropped_cbgs['geoid'].to_numpy(np.str_),\n",
" ),\n",
" time_period=Year(YEAR),\n",
")\n",
"\n",
"try:\n",
" geo = StaticGeo(spec, values)\n",
" geo.validate()\n",
Expand Down Expand Up @@ -601,7 +637,6 @@
"label: No diffs!\n",
"population: No diffs!\n",
"median_age: No diffs!\n",
"population_by_age_x6: No diffs!\n",
"median_income: No diffs!\n",
"average_household_size: No diffs!\n",
"tract_gini_index: No diffs!\n",
Expand Down Expand Up @@ -650,8 +685,9 @@
"diff('population', d1['population'], d2['population'], np.int_.__eq__)\n",
"diff('median_age', d1['median_age'], d2['median_age'], np.isclose)\n",
"# there's no equivalent in the old geo to our new 'population_by_age'\n",
"diff('population_by_age_x6', d1['population_by_age_x6'],\n",
" d2['pop_by_age'], np.array_equal)\n",
"# NOTE: we can't check population_by_age_x6 anymore...\n",
"# diff('population_by_age_x6', d1['population_by_age_x6'],\n",
"# d2['pop_by_age'], np.array_equal)\n",
"diff('median_income', d1['median_income'], d2['median_income'], np.int_.__eq__)\n",
"diff('average_household_size', d1['average_household_size'],\n",
" d2['average_household_size'], np.isclose)\n",
Expand Down
Loading

0 comments on commit 335f07f

Please sign in to comment.