-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GeoScopes and an encoding of the main US Census geographic hierarchy. (…
…#108) Removes pygris dependency from epymorph.plots
- Loading branch information
Showing
13 changed files
with
1,298 additions
and
58 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# devlog 2024-05-03\n", | ||
"\n", | ||
"_Author: Tyler Coles_\n", | ||
"\n", | ||
"Testing our us_census functions for loading canonical sets of IDs for Census granularities from state to block group for the years 2000, 2010, and 2020.\n", | ||
"\n", | ||
"Since this is our source of truth for these delineations, we want to make sure we're getting complete data. One thing we can test is that at each level of granularity (above block group) each node should contain at least one child node. That is every state should contain a county, every county a tract, and every tract a block group. Otherwise we know something is missing.\n", | ||
"\n", | ||
"(This may seem like a trivial test, but in fact it discovered that my original assumptions about how TIGER provides the data were invalid and has already saved us from bugs!)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import epymorph.geography.us_census as c\n", | ||
"\n", | ||
"\n", | ||
"class Fail(Exception):\n", | ||
" pass\n", | ||
"\n", | ||
"\n", | ||
"def test_year(year: c.CensusYear) -> None:\n", | ||
" # 1. test that we have 56 states\n", | ||
" states = c.get_us_states(year).geoid\n", | ||
"\n", | ||
" if len(states) != 56:\n", | ||
" raise Fail(\"There weren't 56 states!\")\n", | ||
"\n", | ||
" # 2. test that each state contains at least one county\n", | ||
" counties = c.get_us_counties(year).geoid\n", | ||
" counties_by_state = c.STATE.grouped(counties)\n", | ||
"\n", | ||
" exs = []\n", | ||
" for x in states:\n", | ||
" if x not in counties_by_state or len(counties_by_state[x]) == 0:\n", | ||
" exs.append(Fail(f\"State {x} does not have at least one county.\"))\n", | ||
" if len(exs) > 0:\n", | ||
" raise ExceptionGroup(\"Failed checking counties.\", exs)\n", | ||
"\n", | ||
" # 3. test that each county contains at least one tract\n", | ||
" tracts = c.get_us_tracts(year).geoid\n", | ||
" tracts_by_county = c.COUNTY.grouped(tracts)\n", | ||
"\n", | ||
" exs = []\n", | ||
" for x in counties:\n", | ||
" if x not in tracts_by_county or len(tracts_by_county[x]) == 0:\n", | ||
" exs.append(Fail(f\"County {x} does not have at least one tract.\"))\n", | ||
" if len(exs) > 0:\n", | ||
" raise ExceptionGroup(\"Failed checking tracts.\", exs)\n", | ||
"\n", | ||
" # 4. test that each tract contains at least one block group\n", | ||
" cbgs = c.get_us_block_groups(year).geoid\n", | ||
" cbgs_by_tract = c.TRACT.grouped(cbgs)\n", | ||
"\n", | ||
" exs = []\n", | ||
" for x in tracts:\n", | ||
" if x not in cbgs_by_tract or len(cbgs_by_tract[x]) == 0:\n", | ||
" exs.append(Fail(f\"Tract {x} does not have at least one block group.\"))\n", | ||
" if len(exs) > 0:\n", | ||
" raise ExceptionGroup(\"Failed checking block groups.\", exs)\n", | ||
"\n", | ||
" print(f\"Census year {year} passed!\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Census year 2020 passed!\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"test_year(2020)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Census year 2010 passed!\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"test_year(2010)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Census year 2000 passed!\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"test_year(2000)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.9" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from abc import abstractmethod | ||
from typing import Protocol | ||
|
||
import numpy as np | ||
from numpy.typing import NDArray | ||
|
||
|
||
class GeoScope(Protocol): | ||
"""The common interface expected of all geo scopes.""" | ||
|
||
@abstractmethod | ||
def get_node_ids(self) -> NDArray[np.str_]: | ||
"""Retrieve the complete list of node IDs included in this scope.""" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# pylint: disable=missing-docstring | ||
import unittest | ||
|
||
import numpy as np | ||
|
||
from epymorph.error import GeographyError | ||
from epymorph.geography.us_census import (BLOCK, BLOCK_GROUP, | ||
CENSUS_GRANULARITY, COUNTY, STATE, | ||
TRACT) | ||
|
||
|
||
class TestCensusGranularity(unittest.TestCase): | ||
|
||
def test_is_nested(self): | ||
# a triangular array perfectly describes the pattern of truth | ||
expected = np.tri(5) | ||
for i, test in enumerate(CENSUS_GRANULARITY): | ||
for j, outer in enumerate(CENSUS_GRANULARITY): | ||
if expected[i, j] == 1: | ||
self.assertTrue(test.is_nested(outer.name)) | ||
else: | ||
self.assertFalse(test.is_nested(outer.name)) | ||
|
||
def test_matches(self): | ||
self.assertTrue(STATE.matches("04")) | ||
self.assertTrue(COUNTY.matches("04003")) | ||
self.assertTrue(TRACT.matches("04003999999")) | ||
self.assertTrue(BLOCK_GROUP.matches("040039999998")) | ||
self.assertTrue(BLOCK.matches("040039999998777")) | ||
|
||
self.assertFalse(STATE.matches("0")) | ||
self.assertFalse(STATE.matches("")) | ||
self.assertFalse(STATE.matches("04003")) | ||
self.assertFalse(STATE.matches("AZ")) | ||
self.assertFalse(COUNTY.matches("04")) | ||
self.assertFalse(COUNTY.matches("04003999999")) | ||
|
||
def test_extract(self): | ||
self.assertEqual("04", STATE.extract("04")) | ||
self.assertEqual("04", STATE.extract("04003")) | ||
self.assertEqual("04", STATE.extract("04003999999")) | ||
self.assertEqual("04", STATE.extract("040039999998")) | ||
self.assertEqual("04", STATE.extract("040039999998777")) | ||
|
||
self.assertEqual("003", COUNTY.extract("04003")) | ||
self.assertEqual("003", COUNTY.extract("04003999999")) | ||
self.assertEqual("003", COUNTY.extract("040039999998")) | ||
self.assertEqual("003", COUNTY.extract("040039999998777")) | ||
with self.assertRaises(GeographyError): | ||
COUNTY.extract("04") | ||
|
||
self.assertEqual("999999", TRACT.extract("04003999999")) | ||
self.assertEqual("999999", TRACT.extract("040039999998")) | ||
self.assertEqual("999999", TRACT.extract("040039999998777")) | ||
with self.assertRaises(GeographyError): | ||
TRACT.extract("04") | ||
|
||
self.assertEqual("8", BLOCK_GROUP.extract("040039999998")) | ||
self.assertEqual("8", BLOCK_GROUP.extract("040039999998777")) | ||
with self.assertRaises(GeographyError): | ||
BLOCK_GROUP.extract("04") | ||
|
||
self.assertEqual("8777", BLOCK.extract("040039999998777")) | ||
with self.assertRaises(GeographyError): | ||
BLOCK.extract("04") | ||
|
||
def test_truncate(self): | ||
self.assertEqual("04", STATE.truncate("04")) | ||
self.assertEqual("04", STATE.truncate("04003")) | ||
self.assertEqual("04", STATE.truncate("04003999999")) | ||
self.assertEqual("04", STATE.truncate("040039999998")) | ||
self.assertEqual("04", STATE.truncate("040039999998777")) | ||
|
||
self.assertEqual("04003", COUNTY.truncate("04003")) | ||
self.assertEqual("04003", COUNTY.truncate("04003999999")) | ||
self.assertEqual("04003", COUNTY.truncate("040039999998")) | ||
self.assertEqual("04003", COUNTY.truncate("040039999998777")) | ||
|
||
self.assertEqual("04003999999", TRACT.truncate("04003999999")) | ||
self.assertEqual("04003999999", TRACT.truncate("040039999998")) | ||
self.assertEqual("04003999999", TRACT.truncate("040039999998777")) | ||
|
||
self.assertEqual("040039999998", BLOCK_GROUP.truncate("040039999998")) | ||
self.assertEqual("040039999998", BLOCK_GROUP.truncate("040039999998777")) | ||
|
||
self.assertEqual("040039999998777", BLOCK.truncate("040039999998777")) | ||
|
||
def test_truncate_list(self): | ||
exp = ["08", "35", "04"] | ||
act = STATE.truncate_list(["08001", "35", "04003", "08002", "04005", "35005"]) | ||
self.assertEqual(exp, act) | ||
|
||
def test_decompose(self): | ||
self.assertEqual(("04",), STATE.decompose("04")) | ||
self.assertEqual(("04", "003"), COUNTY.decompose("04003")) | ||
self.assertEqual(("04", "003", "999999"), TRACT.decompose("04003999999")) | ||
self.assertEqual(("04", "003", "999999", "8"), | ||
BLOCK_GROUP.decompose("040039999998")) | ||
self.assertEqual(("04", "003", "999999", "8", "8777"), | ||
BLOCK.decompose("040039999998777")) | ||
|
||
with self.assertRaises(GeographyError): | ||
STATE.decompose("04013") | ||
|
||
with self.assertRaises(GeographyError): | ||
TRACT.decompose("04013") | ||
|
||
def test_grouped(self): | ||
expected = { | ||
"04004": np.array(["04004111111", "04004222222", "04004333333"]), | ||
"04013": np.array(["04013444444", "04013555555", "04013666666"]), | ||
} | ||
actual = COUNTY.grouped(np.array([ | ||
"04004111111", "04004222222", "04004333333", | ||
"04013444444", "04013555555", "04013666666", | ||
])) | ||
self.assertSetEqual(set(expected.keys()), set(actual.keys())) | ||
np.testing.assert_array_equal(expected["04004"], actual["04004"]) | ||
np.testing.assert_array_equal(expected["04013"], actual["04013"]) |
Oops, something went wrong.