Skip to content

Commit

Permalink
Allow subset queries of tract and bg tiger files.
Browse files Browse the repository at this point in the history
  • Loading branch information
TJohnsonAZ committed Aug 23, 2024
1 parent 5f94c79 commit 7b8c461
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 15 deletions.
14 changes: 9 additions & 5 deletions epymorph/adrio/us_tiger.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from epymorph.data_type import CentroidDType, StructDType
from epymorph.error import DataResourceException
from epymorph.geography.scope import GeoScope
from epymorph.geography.us_census import CensusScope
from epymorph.geography.us_census import STATE, CensusScope
from epymorph.geography.us_tiger import (TigerYear, get_block_groups_geo,
get_block_groups_info,
get_counties_geo, get_counties_info,
Expand Down Expand Up @@ -42,9 +42,11 @@ def _get_geo(scope: CensusScope) -> GeoDataFrame:
case 'county':
gdf = get_counties_geo(year)
case 'tract':
gdf = get_tracts_geo(year)
gdf = get_tracts_geo(year, [STATE.extract(x)
for x in scope.get_node_ids()])
case 'block group':
gdf = get_block_groups_geo(year)
gdf = get_block_groups_geo(year, [STATE.extract(x)
for x in scope.get_node_ids()])
case x:
raise DataResourceException(
f"{x} is not a supported granularity for us_tiger attributes."
Expand All @@ -61,9 +63,11 @@ def _get_info(scope: CensusScope) -> DataFrame:
case 'county':
gdf = get_counties_info(year)
case 'tract':
gdf = get_tracts_info(year)
gdf = get_tracts_info(year, [STATE.extract(x)
for x in scope.get_node_ids()])
case 'block group':
gdf = get_block_groups_info(year)
gdf = get_block_groups_info(year, [STATE.extract(x)
for x in scope.get_node_ids()])
case x:
raise DataResourceException(
f"{x} is not a supported granularity for us_tiger attributes."
Expand Down
26 changes: 16 additions & 10 deletions epymorph/geography/us_tiger.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,12 @@ def get_counties_info(year: TigerYear) -> DataFrame:
##########


def _get_tracts_config(year: TigerYear) -> tuple[list[str], list[str], list[str]]:
def _get_tracts_config(year: TigerYear, state_id: Sequence[str] | None = None) -> tuple[list[str], list[str], list[str]]:
"""Produce the args for _get_info or _get_geo (tracts)."""
states = get_states_info(year)
if state_id is not None:
states = states[states["GEOID"].isin(state_id)]

match year:
case year if year in range(2011, 2024):
cols = ["GEOID", "ALAND", "INTPTLAT", "INTPTLON"]
Expand Down Expand Up @@ -250,24 +253,27 @@ def state_folder(fips, name):
return cols, urls, ["GEOID", "ALAND", "INTPTLAT", "INTPTLON"]


def get_tracts_geo(year: TigerYear) -> GeoDataFrame:
def get_tracts_geo(year: TigerYear, state_id: Sequence[str] | None = None) -> GeoDataFrame:
"""Get all US census tracts for the given census year, with geography."""
return _get_geo(*_get_tracts_config(year))
return _get_geo(*_get_tracts_config(year, state_id))


def get_tracts_info(year: TigerYear) -> DataFrame:
def get_tracts_info(year: TigerYear, state_id: Sequence[str] | None = None) -> DataFrame:
"""Get all US census tracts for the given census year, without geography."""
return _get_info(*_get_tracts_config(year))
return _get_info(*_get_tracts_config(year, state_id))


################
# BLOCK GROUPS #
################


def _get_block_groups_config(year: TigerYear) -> tuple[list[str], list[str], list[str]]:
def _get_block_groups_config(year: TigerYear, state_id: Sequence[str] | None = None) -> tuple[list[str], list[str], list[str]]:
"""Produce the args for _get_info or _get_geo (block groups)."""
states = get_states_info(year)
if state_id is not None:
states = states[states["GEOID"].isin(state_id)]

match year:
case year if year in range(2011, 2024):
cols = ["GEOID", "ALAND", "INTPTLAT", "INTPTLON"]
Expand Down Expand Up @@ -301,11 +307,11 @@ def state_folder(fips, name):
return cols, urls, ["GEOID", "ALAND", "INTPTLAT", "INTPTLON"]


def get_block_groups_geo(year: TigerYear) -> GeoDataFrame:
def get_block_groups_geo(year: TigerYear, state_id: Sequence[str] | None = None) -> GeoDataFrame:
"""Get all US census block groups for the given census year, with geography."""
return _get_geo(*_get_block_groups_config(year))
return _get_geo(*_get_block_groups_config(year, state_id))


def get_block_groups_info(year: TigerYear) -> DataFrame:
def get_block_groups_info(year: TigerYear, state_id: Sequence[str] | None = None) -> DataFrame:
"""Get all US census block groups for the given census year, without geography."""
return _get_info(*_get_block_groups_config(year))
return _get_info(*_get_block_groups_config(year, state_id))

0 comments on commit 7b8c461

Please sign in to comment.