Devlog fixes.

NAU-CCL · Jun 17, 2024 · 545a001 · 545a001
1 parent a8683c7
commit 545a001
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 15 deletions.
diff --git a/doc/devlog/2024-06-12.ipynb b/doc/devlog/2024-06-12.ipynb
@@ -22,6 +22,8 @@
     "from datetime import date\n",
     "from pathlib import Path\n",
     "\n",
+    "from numpy import array_equal\n",
+    "\n",
     "from epymorph.data_shape import Shapes\n",
     "from epymorph.geo.adrio import adrio_maker_library\n",
     "from epymorph.geo.adrio.census.adrio_census import ADRIOMakerCensus\n",
@@ -34,7 +36,6 @@
     "from epymorph.simulation import geo_attrib\n",
     "from pandas import DataFrame, concat\n",
     "\n",
-    "\n",
     "# create and store 'pei_population.csv'\n",
     "census_maker = ADRIOMakerCensus()\n",
     "states_list = ['AZ', 'FL', 'GA', 'MD', 'NY', 'NC', 'SC', 'VA']\n",
@@ -78,7 +79,7 @@
     "\n",
     "# validate geo and ensure both ADRIOs fetched identical data\n",
     "geo.validate()\n",
-    "if not geo['population'].all() == geo['population_census'].all():\n",
+    "if not array_equal(geo['population'], geo['population_census']):\n",
     "    raise Exception(\"Data not equal.\")"
    ]
   },
@@ -89,27 +90,35 @@
    "outputs": [],
    "source": [
     "# create and store 'us_sw_counties_population.csv'\n",
+    "\n",
+    "# get commuters data from asc5\n",
     "states_list = ['04', '08', '49', '35', '32']\n",
     "population_2015 = census_maker.make_adrio(geo_attrib(\n",
     "    'population_by_age', int, Shapes.NxA(3)), CountyScope.in_states(states_list), Year(2015)).get_value()\n",
     "population_2016 = census_maker.make_adrio(geo_attrib(\n",
     "    'population_by_age', int, Shapes.NxA(3)), CountyScope.in_states(states_list), Year(2016)).get_value()\n",
     "\n",
+    "# get county and state info from shapefiles and convert to dataframes\n",
     "counties_info = get_us_counties(2010)\n",
+    "states_info = get_us_states(2010)\n",
     "counties_info_df = DataFrame({'state_geoid': [STATE.extract(\n",
     "    county_id) for county_id in counties_info.geoid], 'geoid': counties_info.geoid, 'name': counties_info.name})\n",
-    "states_info = get_us_states(2010)\n",
     "states_info_df = DataFrame(\n",
     "    {'state_geoid': states_info.geoid, 'state_name': states_info.name})\n",
+    "\n",
+    "# merge dataframes and create \"County, State\" name column\n",
     "merged_df = counties_info_df.merge(states_info_df, on='state_geoid')\n",
     "merged_df['county_name'] = merged_df['name'] + \", \" + merged_df['state_name']\n",
     "merged_df = merged_df.loc[merged_df['state_geoid'].isin(states_list)]\n",
     "\n",
+    "# create and merge dataframes to be converted to csvs\n",
     "df_2015 = DataFrame({'Date': [date(2015, 1, 1) for i in merged_df.index], 'County': merged_df['county_name'], 'Young': [\n",
     "                    pop[0] for pop in population_2015], 'Adult': [pop[1] for pop in population_2015], 'Elderly': [pop[2] for pop in population_2015]})\n",
     "df_2016 = DataFrame({'Date': [date(2016, 1, 1) for i in merged_df.index], 'County': merged_df['county_name'], 'Young': [\n",
     "                    pop[0] for pop in population_2016], 'Adult': [pop[1] for pop in population_2016], 'Elderly': [pop[2] for pop in population_2016]})\n",
     "df = concat([df_2015, df_2016])\n",
+    "\n",
+    "# sort incorrectly and store as csv\n",
     "df.sort_values('Young', inplace=True)\n",
     "df.to_csv(\"./scratch/us_sw_counties_population.csv\", index=False)"
    ]
@@ -157,25 +166,25 @@
     "\n",
     "census_df = DataFrame({'Young': [pop[0] for pop in geo['population_by_age']], 'Adult': [\n",
     "                      pop[1] for pop in geo['population_by_age']], 'Elderly': [pop[2] for pop in geo['population_by_age']]})\n",
-    "if not geo['population_0-19'].all() == census_df['Young'].all():\n",
+    "if not array_equal(geo['population_0-19'], census_df['Young']):\n",
     "    raise Exception(\"Young data not equal.\")\n",
-    "if not geo['population_20-64'].all() == census_df['Adult'].all():\n",
+    "if not array_equal(geo['population_20-64'], census_df['Adult']):\n",
     "    raise Exception(\"Adult data not equal.\")\n",
-    "if not geo['population_65+'].all() == census_df['Elderly'].all():\n",
+    "if not array_equal(geo['population_65+'], census_df['Elderly']):\n",
     "    raise Exception(\"Elderly data not equal.\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
     "# create and store 'counties_commuters_2020.csv'\n",
     "counties_list = ['08001', '35001', '04013', '04017']\n",
     "df = census_maker.fetch_commuters(CountyScope.in_counties(counties_list), 2020)\n",
-    "df['res_geoid'] = '0' + df['res_state_code'] + df['res_county_code']\n",
-    "df['wrk_geoid'] = df['wrk_state_code'] + df['wrk_county_code']\n",
+    "df['res_geoid'] = df['res_state_code'] + df['res_county_code']\n",
+    "df['wrk_geoid'] = df['wrk_state_code'].apply(lambda x: x[1:]) + df['wrk_county_code']\n",
     "\n",
     "df.to_csv('./scratch/counties_commuters_2020.csv',\n",
     "          columns=['res_geoid', 'wrk_geoid', 'workers'], index=False)\n",
@@ -185,7 +194,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -201,7 +210,7 @@
     "    source={\n",
     "        'label': 'Census:name',\n",
     "        'population': 'Census',\n",
-    "        'commuters': CSVSpecMatrix(file_path=Path(\"./epymorph/data/geo/csv/counties_commuters.csv\"),\n",
+    "        'commuters': CSVSpecMatrix(file_path=Path(\"./scratch/counties_commuters_2020.csv\"),\n",
     "                                   from_key_col=0, to_key_col=1, data_col=2, key_type=\"geoid\", skiprows=1),\n",
     "        'commuters_census': 'Census:commuters'\n",
     "    }\n",
@@ -210,14 +219,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
     "geo = DynamicGeo.from_library(spec, adrio_maker_library)\n",
     "\n",
     "geo.validate()\n",
-    "if not geo['commuters'].all() == geo['commuters_census'].all():\n",
+    "if not array_equal(geo['commuters'], geo['commuters_census']):\n",
     "    raise Exception(\"Data not equal.\")"
    ]
   }

diff --git a/epymorph/geo/adrio/census/adrio_census.py b/epymorph/geo/adrio/census/adrio_census.py
@@ -191,6 +191,7 @@ def fetch_acs5(self, variables: list[str], scope: CensusScope, year: int) -> Dat
 
     def fetch_sf(self, scope: CensusScope) -> GeoDataFrame:
         """Utility function to fetch shape files from Census for specified regions."""
+
         # call appropriate pygris function based on granularity and sort result
         match scope:
             case StateScopeAll() | StateScope():
@@ -215,7 +216,9 @@ def fetch_sf(self, scope: CensusScope) -> GeoDataFrame:
         return GeoDataFrame(df)
 
     def fetch_commuters(self, scope: CensusScope, year: int) -> DataFrame:
-        """Utility function to fetch commuting data from .xslx format filtered down to requested regions."""
+        """
+        Utility function to fetch commuting data from .xslx format filtered down to requested regions.
+        """
         # check for invalid granularity
         if isinstance(scope, TractScope) or isinstance(scope, BlockGroupScope):
             msg = "Commuting data cannot be retrieved for tract or block group granularities"

diff --git a/epymorph/geo/adrio/file/adrio_csv.py b/epymorph/geo/adrio/file/adrio_csv.py
@@ -49,7 +49,6 @@ class CSVSpecMatrix():
 @dataclass
 class CSVSpecMatrixTime(CSVSpecMatrix):
     """Dataclass to store parameters for time-series CSV ADRIO with data shape TxNxN."""
-
     time_col: int