Skip to content

Commit

Permalink
GeoScopes and an encoding of the main US Census geographic hierarchy. (
Browse files Browse the repository at this point in the history
…#108)

Removes pygris dependency from epymorph.plots
  • Loading branch information
JavadocMD authored May 6, 2024
1 parent 5544701 commit 008c435
Show file tree
Hide file tree
Showing 13 changed files with 1,298 additions and 58 deletions.
22 changes: 7 additions & 15 deletions doc/demo/02-states-GEO.ipynb

Large diffs are not rendered by default.

22 changes: 7 additions & 15 deletions doc/demo/03-counties-GEO.ipynb

Large diffs are not rendered by default.

147 changes: 147 additions & 0 deletions doc/devlog/2024-05-03.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# devlog 2024-05-03\n",
"\n",
"_Author: Tyler Coles_\n",
"\n",
"Testing our us_census functions for loading canonical sets of IDs for Census granularities from state to block group for the years 2000, 2010, and 2020.\n",
"\n",
"Since this is our source of truth for these delineations, we want to make sure we're getting complete data. One thing we can test is that at each level of granularity (above block group) each node should contain at least one child node. That is every state should contain a county, every county a tract, and every tract a block group. Otherwise we know something is missing.\n",
"\n",
"(This may seem like a trivial test, but in fact it discovered that my original assumptions about how TIGER provides the data were invalid and has already saved us from bugs!)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import epymorph.geography.us_census as c\n",
"\n",
"\n",
"class Fail(Exception):\n",
" pass\n",
"\n",
"\n",
"def test_year(year: c.CensusYear) -> None:\n",
" # 1. test that we have 56 states\n",
" states = c.get_us_states(year).geoid\n",
"\n",
" if len(states) != 56:\n",
" raise Fail(\"There weren't 56 states!\")\n",
"\n",
" # 2. test that each state contains at least one county\n",
" counties = c.get_us_counties(year).geoid\n",
" counties_by_state = c.STATE.grouped(counties)\n",
"\n",
" exs = []\n",
" for x in states:\n",
" if x not in counties_by_state or len(counties_by_state[x]) == 0:\n",
" exs.append(Fail(f\"State {x} does not have at least one county.\"))\n",
" if len(exs) > 0:\n",
" raise ExceptionGroup(\"Failed checking counties.\", exs)\n",
"\n",
" # 3. test that each county contains at least one tract\n",
" tracts = c.get_us_tracts(year).geoid\n",
" tracts_by_county = c.COUNTY.grouped(tracts)\n",
"\n",
" exs = []\n",
" for x in counties:\n",
" if x not in tracts_by_county or len(tracts_by_county[x]) == 0:\n",
" exs.append(Fail(f\"County {x} does not have at least one tract.\"))\n",
" if len(exs) > 0:\n",
" raise ExceptionGroup(\"Failed checking tracts.\", exs)\n",
"\n",
" # 4. test that each tract contains at least one block group\n",
" cbgs = c.get_us_block_groups(year).geoid\n",
" cbgs_by_tract = c.TRACT.grouped(cbgs)\n",
"\n",
" exs = []\n",
" for x in tracts:\n",
" if x not in cbgs_by_tract or len(cbgs_by_tract[x]) == 0:\n",
" exs.append(Fail(f\"Tract {x} does not have at least one block group.\"))\n",
" if len(exs) > 0:\n",
" raise ExceptionGroup(\"Failed checking block groups.\", exs)\n",
"\n",
" print(f\"Census year {year} passed!\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Census year 2020 passed!\n"
]
}
],
"source": [
"test_year(2020)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Census year 2010 passed!\n"
]
}
],
"source": [
"test_year(2010)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Census year 2000 passed!\n"
]
}
],
"source": [
"test_year(2000)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
1 change: 1 addition & 0 deletions doc/devlog/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ This folder is a handy place to put Jupyter notebooks or other documents which h
| 2024-04-04-draw-demo.ipynb | Izaac | | Showing the new draw module for visualising IPMs (NEW!) |
| 2024-04-16.ipynb | Izaac | | Showing error handling for common ipm errors (NEW!)|
| 2024-04-25.ipynb | Tyler | | Integration test: epymorph cache utilities |
| 2024-05-03.ipynb | Tyler | | Integration test: loading US Census geography from TIGER |

## Contributing

Expand Down
6 changes: 6 additions & 0 deletions epymorph/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
from typing import Self


class GeographyError(Exception):
"""Exception working with geographic system representations."""
# NOTE: this is *not* for general errors related to the epymorph GEO module,
# but instead for things like utility functions for working with US Census delineations.


class UnknownModel(Exception):
"""Exception for the inability to load a model as specified."""
model_type: str
Expand Down
Empty file added epymorph/geography/__init__.py
Empty file.
13 changes: 13 additions & 0 deletions epymorph/geography/scope.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from abc import abstractmethod
from typing import Protocol

import numpy as np
from numpy.typing import NDArray


class GeoScope(Protocol):
"""The common interface expected of all geo scopes."""

@abstractmethod
def get_node_ids(self) -> NDArray[np.str_]:
"""Retrieve the complete list of node IDs included in this scope."""
Empty file.
119 changes: 119 additions & 0 deletions epymorph/geography/test/us_census_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# pylint: disable=missing-docstring
import unittest

import numpy as np

from epymorph.error import GeographyError
from epymorph.geography.us_census import (BLOCK, BLOCK_GROUP,
CENSUS_GRANULARITY, COUNTY, STATE,
TRACT)


class TestCensusGranularity(unittest.TestCase):

def test_is_nested(self):
# a triangular array perfectly describes the pattern of truth
expected = np.tri(5)
for i, test in enumerate(CENSUS_GRANULARITY):
for j, outer in enumerate(CENSUS_GRANULARITY):
if expected[i, j] == 1:
self.assertTrue(test.is_nested(outer.name))
else:
self.assertFalse(test.is_nested(outer.name))

def test_matches(self):
self.assertTrue(STATE.matches("04"))
self.assertTrue(COUNTY.matches("04003"))
self.assertTrue(TRACT.matches("04003999999"))
self.assertTrue(BLOCK_GROUP.matches("040039999998"))
self.assertTrue(BLOCK.matches("040039999998777"))

self.assertFalse(STATE.matches("0"))
self.assertFalse(STATE.matches(""))
self.assertFalse(STATE.matches("04003"))
self.assertFalse(STATE.matches("AZ"))
self.assertFalse(COUNTY.matches("04"))
self.assertFalse(COUNTY.matches("04003999999"))

def test_extract(self):
self.assertEqual("04", STATE.extract("04"))
self.assertEqual("04", STATE.extract("04003"))
self.assertEqual("04", STATE.extract("04003999999"))
self.assertEqual("04", STATE.extract("040039999998"))
self.assertEqual("04", STATE.extract("040039999998777"))

self.assertEqual("003", COUNTY.extract("04003"))
self.assertEqual("003", COUNTY.extract("04003999999"))
self.assertEqual("003", COUNTY.extract("040039999998"))
self.assertEqual("003", COUNTY.extract("040039999998777"))
with self.assertRaises(GeographyError):
COUNTY.extract("04")

self.assertEqual("999999", TRACT.extract("04003999999"))
self.assertEqual("999999", TRACT.extract("040039999998"))
self.assertEqual("999999", TRACT.extract("040039999998777"))
with self.assertRaises(GeographyError):
TRACT.extract("04")

self.assertEqual("8", BLOCK_GROUP.extract("040039999998"))
self.assertEqual("8", BLOCK_GROUP.extract("040039999998777"))
with self.assertRaises(GeographyError):
BLOCK_GROUP.extract("04")

self.assertEqual("8777", BLOCK.extract("040039999998777"))
with self.assertRaises(GeographyError):
BLOCK.extract("04")

def test_truncate(self):
self.assertEqual("04", STATE.truncate("04"))
self.assertEqual("04", STATE.truncate("04003"))
self.assertEqual("04", STATE.truncate("04003999999"))
self.assertEqual("04", STATE.truncate("040039999998"))
self.assertEqual("04", STATE.truncate("040039999998777"))

self.assertEqual("04003", COUNTY.truncate("04003"))
self.assertEqual("04003", COUNTY.truncate("04003999999"))
self.assertEqual("04003", COUNTY.truncate("040039999998"))
self.assertEqual("04003", COUNTY.truncate("040039999998777"))

self.assertEqual("04003999999", TRACT.truncate("04003999999"))
self.assertEqual("04003999999", TRACT.truncate("040039999998"))
self.assertEqual("04003999999", TRACT.truncate("040039999998777"))

self.assertEqual("040039999998", BLOCK_GROUP.truncate("040039999998"))
self.assertEqual("040039999998", BLOCK_GROUP.truncate("040039999998777"))

self.assertEqual("040039999998777", BLOCK.truncate("040039999998777"))

def test_truncate_list(self):
exp = ["08", "35", "04"]
act = STATE.truncate_list(["08001", "35", "04003", "08002", "04005", "35005"])
self.assertEqual(exp, act)

def test_decompose(self):
self.assertEqual(("04",), STATE.decompose("04"))
self.assertEqual(("04", "003"), COUNTY.decompose("04003"))
self.assertEqual(("04", "003", "999999"), TRACT.decompose("04003999999"))
self.assertEqual(("04", "003", "999999", "8"),
BLOCK_GROUP.decompose("040039999998"))
self.assertEqual(("04", "003", "999999", "8", "8777"),
BLOCK.decompose("040039999998777"))

with self.assertRaises(GeographyError):
STATE.decompose("04013")

with self.assertRaises(GeographyError):
TRACT.decompose("04013")

def test_grouped(self):
expected = {
"04004": np.array(["04004111111", "04004222222", "04004333333"]),
"04013": np.array(["04013444444", "04013555555", "04013666666"]),
}
actual = COUNTY.grouped(np.array([
"04004111111", "04004222222", "04004333333",
"04013444444", "04013555555", "04013666666",
]))
self.assertSetEqual(set(expected.keys()), set(actual.keys()))
np.testing.assert_array_equal(expected["04004"], actual["04004"])
np.testing.assert_array_equal(expected["04013"], actual["04013"])
Loading

0 comments on commit 008c435

Please sign in to comment.