diff --git a/doc/devlog/2024-06-12.ipynb b/doc/devlog/2024-06-12.ipynb index 00f2b1ef..fb0e5b57 100644 --- a/doc/devlog/2024-06-12.ipynb +++ b/doc/devlog/2024-06-12.ipynb @@ -22,6 +22,8 @@ "from datetime import date\n", "from pathlib import Path\n", "\n", + "from numpy import array_equal\n", + "\n", "from epymorph.data_shape import Shapes\n", "from epymorph.geo.adrio import adrio_maker_library\n", "from epymorph.geo.adrio.census.adrio_census import ADRIOMakerCensus\n", @@ -34,7 +36,6 @@ "from epymorph.simulation import geo_attrib\n", "from pandas import DataFrame, concat\n", "\n", - "\n", "# create and store 'pei_population.csv'\n", "census_maker = ADRIOMakerCensus()\n", "states_list = ['AZ', 'FL', 'GA', 'MD', 'NY', 'NC', 'SC', 'VA']\n", @@ -78,7 +79,7 @@ "\n", "# validate geo and ensure both ADRIOs fetched identical data\n", "geo.validate()\n", - "if not geo['population'].all() == geo['population_census'].all():\n", + "if not array_equal(geo['population'], geo['population_census']):\n", " raise Exception(\"Data not equal.\")" ] }, @@ -89,27 +90,35 @@ "outputs": [], "source": [ "# create and store 'us_sw_counties_population.csv'\n", + "\n", + "# get commuters data from asc5\n", "states_list = ['04', '08', '49', '35', '32']\n", "population_2015 = census_maker.make_adrio(geo_attrib(\n", " 'population_by_age', int, Shapes.NxA(3)), CountyScope.in_states(states_list), Year(2015)).get_value()\n", "population_2016 = census_maker.make_adrio(geo_attrib(\n", " 'population_by_age', int, Shapes.NxA(3)), CountyScope.in_states(states_list), Year(2016)).get_value()\n", "\n", + "# get county and state info from shapefiles and convert to dataframes\n", "counties_info = get_us_counties(2010)\n", + "states_info = get_us_states(2010)\n", "counties_info_df = DataFrame({'state_geoid': [STATE.extract(\n", " county_id) for county_id in counties_info.geoid], 'geoid': counties_info.geoid, 'name': counties_info.name})\n", - "states_info = get_us_states(2010)\n", "states_info_df = DataFrame(\n", " {'state_geoid': states_info.geoid, 'state_name': states_info.name})\n", + "\n", + "# merge dataframes and create \"County, State\" name column\n", "merged_df = counties_info_df.merge(states_info_df, on='state_geoid')\n", "merged_df['county_name'] = merged_df['name'] + \", \" + merged_df['state_name']\n", "merged_df = merged_df.loc[merged_df['state_geoid'].isin(states_list)]\n", "\n", + "# create and merge dataframes to be converted to csvs\n", "df_2015 = DataFrame({'Date': [date(2015, 1, 1) for i in merged_df.index], 'County': merged_df['county_name'], 'Young': [\n", " pop[0] for pop in population_2015], 'Adult': [pop[1] for pop in population_2015], 'Elderly': [pop[2] for pop in population_2015]})\n", "df_2016 = DataFrame({'Date': [date(2016, 1, 1) for i in merged_df.index], 'County': merged_df['county_name'], 'Young': [\n", " pop[0] for pop in population_2016], 'Adult': [pop[1] for pop in population_2016], 'Elderly': [pop[2] for pop in population_2016]})\n", "df = concat([df_2015, df_2016])\n", + "\n", + "# sort incorrectly and store as csv\n", "df.sort_values('Young', inplace=True)\n", "df.to_csv(\"./scratch/us_sw_counties_population.csv\", index=False)" ] @@ -157,25 +166,25 @@ "\n", "census_df = DataFrame({'Young': [pop[0] for pop in geo['population_by_age']], 'Adult': [\n", " pop[1] for pop in geo['population_by_age']], 'Elderly': [pop[2] for pop in geo['population_by_age']]})\n", - "if not geo['population_0-19'].all() == census_df['Young'].all():\n", + "if not array_equal(geo['population_0-19'], census_df['Young']):\n", " raise Exception(\"Young data not equal.\")\n", - "if not geo['population_20-64'].all() == census_df['Adult'].all():\n", + "if not array_equal(geo['population_20-64'], census_df['Adult']):\n", " raise Exception(\"Adult data not equal.\")\n", - "if not geo['population_65+'].all() == census_df['Elderly'].all():\n", + "if not array_equal(geo['population_65+'], census_df['Elderly']):\n", " raise Exception(\"Elderly data not equal.\")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# create and store 'counties_commuters_2020.csv'\n", "counties_list = ['08001', '35001', '04013', '04017']\n", "df = census_maker.fetch_commuters(CountyScope.in_counties(counties_list), 2020)\n", - "df['res_geoid'] = '0' + df['res_state_code'] + df['res_county_code']\n", - "df['wrk_geoid'] = df['wrk_state_code'] + df['wrk_county_code']\n", + "df['res_geoid'] = df['res_state_code'] + df['res_county_code']\n", + "df['wrk_geoid'] = df['wrk_state_code'].apply(lambda x: x[1:]) + df['wrk_county_code']\n", "\n", "df.to_csv('./scratch/counties_commuters_2020.csv',\n", " columns=['res_geoid', 'wrk_geoid', 'workers'], index=False)\n", @@ -185,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -201,7 +210,7 @@ " source={\n", " 'label': 'Census:name',\n", " 'population': 'Census',\n", - " 'commuters': CSVSpecMatrix(file_path=Path(\"./epymorph/data/geo/csv/counties_commuters.csv\"),\n", + " 'commuters': CSVSpecMatrix(file_path=Path(\"./scratch/counties_commuters_2020.csv\"),\n", " from_key_col=0, to_key_col=1, data_col=2, key_type=\"geoid\", skiprows=1),\n", " 'commuters_census': 'Census:commuters'\n", " }\n", @@ -210,14 +219,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "geo = DynamicGeo.from_library(spec, adrio_maker_library)\n", "\n", "geo.validate()\n", - "if not geo['commuters'].all() == geo['commuters_census'].all():\n", + "if not array_equal(geo['commuters'], geo['commuters_census']):\n", " raise Exception(\"Data not equal.\")" ] } diff --git a/epymorph/geo/adrio/census/adrio_census.py b/epymorph/geo/adrio/census/adrio_census.py index 4ceb5122..35454b4c 100644 --- a/epymorph/geo/adrio/census/adrio_census.py +++ b/epymorph/geo/adrio/census/adrio_census.py @@ -191,6 +191,7 @@ def fetch_acs5(self, variables: list[str], scope: CensusScope, year: int) -> Dat def fetch_sf(self, scope: CensusScope) -> GeoDataFrame: """Utility function to fetch shape files from Census for specified regions.""" + # call appropriate pygris function based on granularity and sort result match scope: case StateScopeAll() | StateScope(): @@ -215,7 +216,9 @@ def fetch_sf(self, scope: CensusScope) -> GeoDataFrame: return GeoDataFrame(df) def fetch_commuters(self, scope: CensusScope, year: int) -> DataFrame: - """Utility function to fetch commuting data from .xslx format filtered down to requested regions.""" + """ + Utility function to fetch commuting data from .xslx format filtered down to requested regions. + """ # check for invalid granularity if isinstance(scope, TractScope) or isinstance(scope, BlockGroupScope): msg = "Commuting data cannot be retrieved for tract or block group granularities" diff --git a/epymorph/geo/adrio/file/adrio_csv.py b/epymorph/geo/adrio/file/adrio_csv.py index b12afb58..f4d0b213 100644 --- a/epymorph/geo/adrio/file/adrio_csv.py +++ b/epymorph/geo/adrio/file/adrio_csv.py @@ -49,7 +49,6 @@ class CSVSpecMatrix(): @dataclass class CSVSpecMatrixTime(CSVSpecMatrix): """Dataclass to store parameters for time-series CSV ADRIO with data shape TxNxN.""" - time_col: int