Skip to content

Commit

Permalink
Fix scrambled attributes (#132)
Browse files Browse the repository at this point in the history
Adds integration test notebook confirming a subset of the Census ADRIO sources are reporting the expected data.
  • Loading branch information
TJohnsonAZ authored Jul 12, 2024
1 parent 77cc8a1 commit 89fd46c
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 5 deletions.
10 changes: 5 additions & 5 deletions doc/demo/03-counties-GEO.ipynb

Large diffs are not rendered by default.

209 changes: 209 additions & 0 deletions doc/devlog/2024-07-10.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# devlog 2024-07-10\n",
"\n",
"_author: Trevor Johnson_\n",
"\n",
"Integration test for Census ADRIOs. This notebook ensures that Census data attributes are being fetched correctly by evaluating:\n",
"- Attribute shape\n",
"- Attribute data type\n",
"- Attribute values\n",
"- Attribute sort order"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"from epymorph.data_shape import Shapes\n",
"from epymorph.data_type import CentroidDType\n",
"from epymorph.geo.adrio.census.adrio_census import ADRIOMakerCensus\n",
"from epymorph.geo.spec import Year\n",
"from epymorph.geography.us_census import CountyScope\n",
"from epymorph.simulation import geo_attrib\n",
"\n",
"# make adrios for one attribute from each fetch method\n",
"maker = ADRIOMakerCensus()\n",
"geoids = ['04001', '04003', '04005', '04013', '04017']\n",
"scope = CountyScope.in_counties(geoids)\n",
"time_period = Year(2020)\n",
"attribs = [geo_attrib('population', int, Shapes.N), geo_attrib(\n",
" 'centroid', CentroidDType, Shapes.N), geo_attrib('commuters', int, Shapes.NxN)]\n",
"\n",
"population = maker.make_adrio(attribs[0], scope, time_period)\n",
"centroid = maker.make_adrio(attribs[1], scope, time_period)\n",
"commuters = maker.make_adrio(attribs[2], scope, time_period)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AC5 attribute validation passed.\n",
"Shapefile attribute validation passed.\n",
"Commuting flows attribute validation passed.\n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"from epymorph.util import check_ndarray\n",
"\n",
"# validate datatype and shape\n",
"check_ndarray(population.get_value(), dtype=[int], shape=attribs[0].shape.as_tuple(\n",
" len(population.get_value()), time_period.days))\n",
"check_ndarray(centroid.get_value(), dtype=[CentroidDType], shape=attribs[1].shape.as_tuple(\n",
" len(population.get_value()), time_period.days))\n",
"check_ndarray(commuters.get_value(), dtype=[int], shape=attribs[2].shape.as_tuple(\n",
" len(population.get_value()), time_period.days))\n",
"\n",
"# values retrieved manually from Census table B01001\n",
"population_array = [71714, 126442, 142254, 4412779, 110271]\n",
"\n",
"# values calculated manually using polygon centroid formula applied to tygris shapefile polygons\n",
"centroid_array = np.array([(-109.48884962248498, 35.39552879677974),\n",
" (-109.75126313676874, 31.87963708630415),\n",
" (-111.77052095609857, 35.838724829519194),\n",
" (-112.49151143850366, 33.349039435609264),\n",
" (-110.32141934757458, 35.39955033687498)], dtype=CentroidDType)\n",
"\n",
"# values retrieved manually from ACS commuting flows table1 for 2020\n",
"commuters_matrix = [[14190, 0, 149, 347, 1668],\n",
" [0, 43820, 32, 160, 5],\n",
" [99, 17, 59440, 1160, 525],\n",
" [22, 52, 757, 2059135, 240],\n",
" [706, 14, 1347, 592, 30520]]\n",
"\n",
"# validate values and sort order\n",
"if np.array_equal(population_array, population.get_value()):\n",
" print('AC5 attribute validation passed.')\n",
"if np.allclose(centroid_array.tolist(), centroid.get_value().tolist()):\n",
" print('Shapefile attribute validation passed.')\n",
"if np.array_equal(commuters_matrix, commuters.get_value()):\n",
" print('Commuting flows attribute validation passed.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The following cells caclulate geographic centroids from shapefiles and compare the result to values calculated by shapely."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from io import BytesIO\n",
"from urllib.request import urlopen\n",
"from geopandas import read_file\n",
"\n",
"# load in shapefile data for use in centroid caclulations\n",
"with urlopen(\"https://www2.census.gov/geo/tiger/TIGER2020/COUNTY/tl_2020_us_county.zip\") as f:\n",
" file_buffer = BytesIO()\n",
" file_buffer.write(f.read())\n",
" file_buffer.seek(0)\n",
" gdf = read_file(file_buffer, engine=\"fiona\", ignore_geometry=False,\n",
" include_fields=[\"GEOID\", \"STUSPS\"])\n",
" gdf = gdf[gdf['GEOID'].isin(geoids)]\n",
" gdf.sort_values(by='GEOID', inplace=True)\n",
" geometry = gdf['geometry'].to_list()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(-109.48884962242164, 35.395528796753005), (-109.75126313669315, 31.87963708628258), (-111.77052095590304, 35.83872482945673), (-112.49151143850068, 33.34903943560914), (-110.32141934752828, 35.39955033686066)]\n"
]
}
],
"source": [
"# centroids as calculated by shapely's centroid property (for reference)\n",
"centroids = [x.centroid.coords[0] for x in gdf['geometry']]\n",
"print(centroids)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[(-109.48884962248498, 35.39552879677974), (-109.75126313676874, 31.87963708630415), (-111.77052095609857, 35.838724829519194), (-112.49151143850366, 33.349039435609264), (-110.32141934757458, 35.39955033687498)]\n"
]
}
],
"source": [
"# calculate centroids manually using polygon centroid formula https://en.wikipedia.org/wiki/Centroid#Of_a_polygon\n",
"centroids = []\n",
"for county in geometry:\n",
" sum = 0.0\n",
" coords = list(county.exterior.coords)\n",
" for point in range(0, len(coords) - 1):\n",
" sum += (coords[point][0] * coords[point + 1][1]) - \\\n",
" (coords[point + 1][0] * coords[point][1])\n",
"\n",
" a = sum * 0.5\n",
"\n",
" xsum = 0.0\n",
" ysum = 0.0\n",
" for point in range(0, len(coords) - 1):\n",
" xsum += (coords[point][0] + coords[point + 1][0]) * ((coords[point][0]\n",
" * coords[point + 1][1]) - (coords[point + 1][0] * coords[point][1]))\n",
" ysum += (coords[point][1] + coords[point + 1][1]) * ((coords[point][0]\n",
" * coords[point + 1][1]) - (coords[point + 1][0] * coords[point][1]))\n",
"\n",
" cx = (1 / (6 * a)) * xsum\n",
" cy = (1 / (6 * a)) * ysum\n",
"\n",
" centroids.append((cx, cy))\n",
"\n",
"print(centroids)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
1 change: 1 addition & 0 deletions doc/devlog/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ This folder is a handy place to put Jupyter notebooks or other documents which h
| 2024-06-03.ipynb | Trevor | | Integration test: using dynamic geos to fetch Census data |
| 2024-06-05.ipynb | Meaghan | | A user manual and basic demonstrations of calling LODES ADRIOs |
| 2024-06-12.ipynb | Trevor | | Integration test: CSV file ADRIOs |
| 2024-06-12.ipynb | Trevor || Integration test: Census ADRIOs |

## Contributing

Expand Down
1 change: 1 addition & 0 deletions epymorph/geo/adrio/census/adrio_census.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def fetch_sf(self, scope: CensusScope) -> GeoDataFrame:
df = df.rename(columns={'GEOID': 'geoid'})

df = df[df['geoid'].isin(scope.get_node_ids())]
df = df.sort_values(by='geoid')

return GeoDataFrame(df)

Expand Down

0 comments on commit 89fd46c

Please sign in to comment.