From 90c904598770ee2a45b12525068cb87e4ad2370f Mon Sep 17 00:00:00 2001 From: TJohnsonAZ <72234106+TJohnsonAZ@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:59:11 -0700 Subject: [PATCH] Tiger subset (#151) Allow subset of TIGER data to be queried from us_tiger.py. --- epymorph/adrio/us_tiger.py | 14 +++++++++----- epymorph/geography/us_tiger.py | 26 ++++++++++++++++---------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/epymorph/adrio/us_tiger.py b/epymorph/adrio/us_tiger.py index 02c57cc2..823a45c3 100644 --- a/epymorph/adrio/us_tiger.py +++ b/epymorph/adrio/us_tiger.py @@ -8,7 +8,7 @@ from epymorph.data_type import CentroidDType, StructDType from epymorph.error import DataResourceException from epymorph.geography.scope import GeoScope -from epymorph.geography.us_census import CensusScope +from epymorph.geography.us_census import STATE, CensusScope from epymorph.geography.us_tiger import (TigerYear, get_block_groups_geo, get_block_groups_info, get_counties_geo, get_counties_info, @@ -42,9 +42,11 @@ def _get_geo(scope: CensusScope) -> GeoDataFrame: case 'county': gdf = get_counties_geo(year) case 'tract': - gdf = get_tracts_geo(year) + gdf = get_tracts_geo(year, list({STATE.extract(x) + for x in scope.get_node_ids()})) case 'block group': - gdf = get_block_groups_geo(year) + gdf = get_block_groups_geo(year, list({STATE.extract(x) + for x in scope.get_node_ids()})) case x: raise DataResourceException( f"{x} is not a supported granularity for us_tiger attributes." @@ -61,9 +63,11 @@ def _get_info(scope: CensusScope) -> DataFrame: case 'county': gdf = get_counties_info(year) case 'tract': - gdf = get_tracts_info(year) + gdf = get_tracts_info(year, list({STATE.extract(x) + for x in scope.get_node_ids()})) case 'block group': - gdf = get_block_groups_info(year) + gdf = get_block_groups_info(year, list({STATE.extract(x) + for x in scope.get_node_ids()})) case x: raise DataResourceException( f"{x} is not a supported granularity for us_tiger attributes." diff --git a/epymorph/geography/us_tiger.py b/epymorph/geography/us_tiger.py index 7a0d5a27..a5adb5e5 100644 --- a/epymorph/geography/us_tiger.py +++ b/epymorph/geography/us_tiger.py @@ -214,9 +214,12 @@ def get_counties_info(year: TigerYear) -> DataFrame: ########## -def _get_tracts_config(year: TigerYear) -> tuple[list[str], list[str], list[str]]: +def _get_tracts_config(year: TigerYear, state_id: Sequence[str] | None = None) -> tuple[list[str], list[str], list[str]]: """Produce the args for _get_info or _get_geo (tracts).""" states = get_states_info(year) + if state_id is not None: + states = states[states["GEOID"].isin(state_id)] + match year: case year if year in range(2011, 2024): cols = ["GEOID", "ALAND", "INTPTLAT", "INTPTLON"] @@ -250,14 +253,14 @@ def state_folder(fips, name): return cols, urls, ["GEOID", "ALAND", "INTPTLAT", "INTPTLON"] -def get_tracts_geo(year: TigerYear) -> GeoDataFrame: +def get_tracts_geo(year: TigerYear, state_id: Sequence[str] | None = None) -> GeoDataFrame: """Get all US census tracts for the given census year, with geography.""" - return _get_geo(*_get_tracts_config(year)) + return _get_geo(*_get_tracts_config(year, state_id)) -def get_tracts_info(year: TigerYear) -> DataFrame: +def get_tracts_info(year: TigerYear, state_id: Sequence[str] | None = None) -> DataFrame: """Get all US census tracts for the given census year, without geography.""" - return _get_info(*_get_tracts_config(year)) + return _get_info(*_get_tracts_config(year, state_id)) ################ @@ -265,9 +268,12 @@ def get_tracts_info(year: TigerYear) -> DataFrame: ################ -def _get_block_groups_config(year: TigerYear) -> tuple[list[str], list[str], list[str]]: +def _get_block_groups_config(year: TigerYear, state_id: Sequence[str] | None = None) -> tuple[list[str], list[str], list[str]]: """Produce the args for _get_info or _get_geo (block groups).""" states = get_states_info(year) + if state_id is not None: + states = states[states["GEOID"].isin(state_id)] + match year: case year if year in range(2011, 2024): cols = ["GEOID", "ALAND", "INTPTLAT", "INTPTLON"] @@ -301,11 +307,11 @@ def state_folder(fips, name): return cols, urls, ["GEOID", "ALAND", "INTPTLAT", "INTPTLON"] -def get_block_groups_geo(year: TigerYear) -> GeoDataFrame: +def get_block_groups_geo(year: TigerYear, state_id: Sequence[str] | None = None) -> GeoDataFrame: """Get all US census block groups for the given census year, with geography.""" - return _get_geo(*_get_block_groups_config(year)) + return _get_geo(*_get_block_groups_config(year, state_id)) -def get_block_groups_info(year: TigerYear) -> DataFrame: +def get_block_groups_info(year: TigerYear, state_id: Sequence[str] | None = None) -> DataFrame: """Get all US census block groups for the given census year, without geography.""" - return _get_info(*_get_block_groups_config(year)) + return _get_info(*_get_block_groups_config(year, state_id))