Skip to content

Commit

Permalink
Devlog fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
TJohnsonAZ committed Jun 17, 2024
1 parent a8683c7 commit 545a001
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 15 deletions.
35 changes: 22 additions & 13 deletions doc/devlog/2024-06-12.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"from datetime import date\n",
"from pathlib import Path\n",
"\n",
"from numpy import array_equal\n",
"\n",
"from epymorph.data_shape import Shapes\n",
"from epymorph.geo.adrio import adrio_maker_library\n",
"from epymorph.geo.adrio.census.adrio_census import ADRIOMakerCensus\n",
Expand All @@ -34,7 +36,6 @@
"from epymorph.simulation import geo_attrib\n",
"from pandas import DataFrame, concat\n",
"\n",
"\n",
"# create and store 'pei_population.csv'\n",
"census_maker = ADRIOMakerCensus()\n",
"states_list = ['AZ', 'FL', 'GA', 'MD', 'NY', 'NC', 'SC', 'VA']\n",
Expand Down Expand Up @@ -78,7 +79,7 @@
"\n",
"# validate geo and ensure both ADRIOs fetched identical data\n",
"geo.validate()\n",
"if not geo['population'].all() == geo['population_census'].all():\n",
"if not array_equal(geo['population'], geo['population_census']):\n",
" raise Exception(\"Data not equal.\")"
]
},
Expand All @@ -89,27 +90,35 @@
"outputs": [],
"source": [
"# create and store 'us_sw_counties_population.csv'\n",
"\n",
"# get commuters data from asc5\n",
"states_list = ['04', '08', '49', '35', '32']\n",
"population_2015 = census_maker.make_adrio(geo_attrib(\n",
" 'population_by_age', int, Shapes.NxA(3)), CountyScope.in_states(states_list), Year(2015)).get_value()\n",
"population_2016 = census_maker.make_adrio(geo_attrib(\n",
" 'population_by_age', int, Shapes.NxA(3)), CountyScope.in_states(states_list), Year(2016)).get_value()\n",
"\n",
"# get county and state info from shapefiles and convert to dataframes\n",
"counties_info = get_us_counties(2010)\n",
"states_info = get_us_states(2010)\n",
"counties_info_df = DataFrame({'state_geoid': [STATE.extract(\n",
" county_id) for county_id in counties_info.geoid], 'geoid': counties_info.geoid, 'name': counties_info.name})\n",
"states_info = get_us_states(2010)\n",
"states_info_df = DataFrame(\n",
" {'state_geoid': states_info.geoid, 'state_name': states_info.name})\n",
"\n",
"# merge dataframes and create \"County, State\" name column\n",
"merged_df = counties_info_df.merge(states_info_df, on='state_geoid')\n",
"merged_df['county_name'] = merged_df['name'] + \", \" + merged_df['state_name']\n",
"merged_df = merged_df.loc[merged_df['state_geoid'].isin(states_list)]\n",
"\n",
"# create and merge dataframes to be converted to csvs\n",
"df_2015 = DataFrame({'Date': [date(2015, 1, 1) for i in merged_df.index], 'County': merged_df['county_name'], 'Young': [\n",
" pop[0] for pop in population_2015], 'Adult': [pop[1] for pop in population_2015], 'Elderly': [pop[2] for pop in population_2015]})\n",
"df_2016 = DataFrame({'Date': [date(2016, 1, 1) for i in merged_df.index], 'County': merged_df['county_name'], 'Young': [\n",
" pop[0] for pop in population_2016], 'Adult': [pop[1] for pop in population_2016], 'Elderly': [pop[2] for pop in population_2016]})\n",
"df = concat([df_2015, df_2016])\n",
"\n",
"# sort incorrectly and store as csv\n",
"df.sort_values('Young', inplace=True)\n",
"df.to_csv(\"./scratch/us_sw_counties_population.csv\", index=False)"
]
Expand Down Expand Up @@ -157,25 +166,25 @@
"\n",
"census_df = DataFrame({'Young': [pop[0] for pop in geo['population_by_age']], 'Adult': [\n",
" pop[1] for pop in geo['population_by_age']], 'Elderly': [pop[2] for pop in geo['population_by_age']]})\n",
"if not geo['population_0-19'].all() == census_df['Young'].all():\n",
"if not array_equal(geo['population_0-19'], census_df['Young']):\n",
" raise Exception(\"Young data not equal.\")\n",
"if not geo['population_20-64'].all() == census_df['Adult'].all():\n",
"if not array_equal(geo['population_20-64'], census_df['Adult']):\n",
" raise Exception(\"Adult data not equal.\")\n",
"if not geo['population_65+'].all() == census_df['Elderly'].all():\n",
"if not array_equal(geo['population_65+'], census_df['Elderly']):\n",
" raise Exception(\"Elderly data not equal.\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# create and store 'counties_commuters_2020.csv'\n",
"counties_list = ['08001', '35001', '04013', '04017']\n",
"df = census_maker.fetch_commuters(CountyScope.in_counties(counties_list), 2020)\n",
"df['res_geoid'] = '0' + df['res_state_code'] + df['res_county_code']\n",
"df['wrk_geoid'] = df['wrk_state_code'] + df['wrk_county_code']\n",
"df['res_geoid'] = df['res_state_code'] + df['res_county_code']\n",
"df['wrk_geoid'] = df['wrk_state_code'].apply(lambda x: x[1:]) + df['wrk_county_code']\n",
"\n",
"df.to_csv('./scratch/counties_commuters_2020.csv',\n",
" columns=['res_geoid', 'wrk_geoid', 'workers'], index=False)\n",
Expand All @@ -185,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -201,7 +210,7 @@
" source={\n",
" 'label': 'Census:name',\n",
" 'population': 'Census',\n",
" 'commuters': CSVSpecMatrix(file_path=Path(\"./epymorph/data/geo/csv/counties_commuters.csv\"),\n",
" 'commuters': CSVSpecMatrix(file_path=Path(\"./scratch/counties_commuters_2020.csv\"),\n",
" from_key_col=0, to_key_col=1, data_col=2, key_type=\"geoid\", skiprows=1),\n",
" 'commuters_census': 'Census:commuters'\n",
" }\n",
Expand All @@ -210,14 +219,14 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"geo = DynamicGeo.from_library(spec, adrio_maker_library)\n",
"\n",
"geo.validate()\n",
"if not geo['commuters'].all() == geo['commuters_census'].all():\n",
"if not array_equal(geo['commuters'], geo['commuters_census']):\n",
" raise Exception(\"Data not equal.\")"
]
}
Expand Down
5 changes: 4 additions & 1 deletion epymorph/geo/adrio/census/adrio_census.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def fetch_acs5(self, variables: list[str], scope: CensusScope, year: int) -> Dat

def fetch_sf(self, scope: CensusScope) -> GeoDataFrame:
"""Utility function to fetch shape files from Census for specified regions."""

# call appropriate pygris function based on granularity and sort result
match scope:
case StateScopeAll() | StateScope():
Expand All @@ -215,7 +216,9 @@ def fetch_sf(self, scope: CensusScope) -> GeoDataFrame:
return GeoDataFrame(df)

def fetch_commuters(self, scope: CensusScope, year: int) -> DataFrame:
"""Utility function to fetch commuting data from .xslx format filtered down to requested regions."""
"""
Utility function to fetch commuting data from .xslx format filtered down to requested regions.
"""
# check for invalid granularity
if isinstance(scope, TractScope) or isinstance(scope, BlockGroupScope):
msg = "Commuting data cannot be retrieved for tract or block group granularities"
Expand Down
1 change: 0 additions & 1 deletion epymorph/geo/adrio/file/adrio_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ class CSVSpecMatrix():
@dataclass
class CSVSpecMatrixTime(CSVSpecMatrix):
"""Dataclass to store parameters for time-series CSV ADRIO with data shape TxNxN."""

time_col: int


Expand Down

0 comments on commit 545a001

Please sign in to comment.