Skip to content

Commit

Permalink
LODES initial implementation. (#123)
Browse files Browse the repository at this point in the history
* Work in progress LODES ADRIO

* LODES work in progress

* Fixed up caching, aggregating multiple states, and added GeoScopes FIPS to code logic

* Updated LODES

* Addition of LODES User Manual

* Small changes for geography

* GeoScope call adaptation

* Geoscopes logic for input geoids

* Revert "Geoscopes logic for input geoids"

This reverts commit d4a3f35.

* Fixed commit, adapts LODES for accepting given geoids

* Slight update to jupyter notebooks

* Name ADRIO and updated user manual

* LODES user manual, demo, and py file updates.

* Reformatted User Manual and Fixed lodes.py

* CensusScope year fix and written details
  • Loading branch information
meaghan66 authored Jun 28, 2024
1 parent c0a31c3 commit 90cd112
Show file tree
Hide file tree
Showing 6 changed files with 680 additions and 2 deletions.
147 changes: 147 additions & 0 deletions doc/devlog/2024-05-09-lodes-adrio-demo.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# devlog 2024-05-09 LODES ADRIO Demo\n",
"\n",
"_author: Meaghan Freund_\n",
"\n",
"Gives a full geo spec of attributes and sources for the LODES ADRIO maker to be able to test and demo calls."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from epymorph.data_shape import Shapes\n",
"from epymorph.geo.spec import DynamicGeoSpec, Year, attrib\n",
"from epymorph.geography.us_census import (CountyScope)\n",
"\n",
"spec = DynamicGeoSpec(\n",
" attributes=[\n",
" attrib('label', str, Shapes.N),\n",
" attrib('geoid', str, Shapes.N),\n",
" attrib('commuters', int, Shapes.NxN),\n",
" attrib('commuters_29_under', int, Shapes.NxN),\n",
" attrib('commuters_30_to_54', int, Shapes.NxN),\n",
" attrib('commuters_55_over', int, Shapes.NxN),\n",
" attrib('commuters_1250_under_earnings', int, Shapes.NxN),\n",
" attrib('commuters_1251_to_3333_earnings', int, Shapes.NxN),\n",
" attrib('commuters_3333_over_earnings', int, Shapes.NxN),\n",
" attrib('commuters_goods_producing_industry', int, Shapes.NxN),\n",
" attrib('commuters_trade_transport_utility_industry', int, Shapes.NxN),\n",
" attrib('commuters_other_industry', int, Shapes.NxN),\n",
" attrib('all_jobs', int, Shapes.NxN),\n",
" attrib('primary_jobs', int, Shapes.NxN),\n",
" attrib('all_private_jobs', int, Shapes.NxN),\n",
" attrib('private_primary_jobs', int, Shapes.NxN),\n",
" attrib('all_federal_jobs', int, Shapes.NxN),\n",
" attrib('federal_primary_jobs', int, Shapes.NxN)\n",
" ],\n",
" time_period=Year(2015),\n",
" scope=CountyScope.in_states_by_code([\"AZ\", \"CO\", \"NM\", \"NV\"]),\n",
" source={\n",
" 'label': 'LODES:geoid',\n",
" 'geoid': 'LODES',\n",
" 'commuters': 'LODES',\n",
" 'commuters_29_under': 'LODES',\n",
" 'commuters_30_to_54': 'LODES',\n",
" 'commuters_55_over': 'LODES',\n",
" 'commuters_1250_under_earnings': 'LODES',\n",
" 'commuters_1251_to_3333_earnings': 'LODES',\n",
" 'commuters_3333_over_earnings': 'LODES',\n",
" 'commuters_goods_producing_industry': 'LODES',\n",
" 'commuters_trade_transport_utility_industry': 'LODES',\n",
" 'commuters_other_industry': 'LODES',\n",
" 'all_jobs': 'LODES',\n",
" 'primary_jobs': 'LODES',\n",
" 'all_private_jobs': 'LODES',\n",
" 'private_primary_jobs': 'LODES',\n",
" 'all_federal_jobs': 'LODES',\n",
" 'federal_primary_jobs': 'LODES'\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from epymorph.geo.adrio import adrio_maker_library\n",
"from epymorph.geo.dynamic import DynamicGeo\n",
"\n",
"geo = DynamicGeo.from_library(spec, adrio_maker_library)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[array(['04001', '04003', '04005', '04007', '04009', '04011', '04012',\n",
" '04013', '04015', '04017', '04019', '04021', '04023', '04025',\n",
" '04027', '08001', '08003', '08005', '08007', '08009', '08011',\n",
" '08013', '08014', '08015', '08017', '08019', '08021', '08023',\n",
" '08025', '08027', '08029', '08031', '08033', '08035', '08037',\n",
" '08039', '08041', '08043', '08045', '08047', '08049', '08051',\n",
" '08053', '08055', '08057', '08059', '08061', '08063', '08065',\n",
" '08067', '08069', '08071', '08073', '08075', '08077', '08079',\n",
" '08081', '08083', '08085', '08087', '08089', '08091', '08093',\n",
" '08095', '08097', '08099', '08101', '08103', '08105', '08107',\n",
" '08109', '08111', '08113', '08115', '08117', '08119', '08121',\n",
" '08123', '08125', '32001', '32003', '32005', '32007', '32009',\n",
" '32011', '32013', '32015', '32017', '32019', '32021', '32023',\n",
" '32027', '32029', '32031', '32033', '32510', '35001', '35003',\n",
" '35005', '35006', '35007', '35009', '35011', '35013', '35015',\n",
" '35017', '35019', '35021', '35023', '35025', '35027', '35028',\n",
" '35029', '35031', '35033', '35035', '35037', '35039', '35041',\n",
" '35043', '35045', '35047', '35049', '35051', '35053', '35055',\n",
" '35057', '35059', '35061'], dtype='<U5'), array([[ 7313, 21, 649, ..., 0, 1, 0],\n",
" [ 39, 21713, 108, ..., 0, 0, 1],\n",
" [ 1109, 67, 36806, ..., 0, 0, 2],\n",
" ...,\n",
" [ 6, 0, 1, ..., 982, 5, 80],\n",
" [ 0, 0, 0, ..., 11, 1012, 1],\n",
" [ 14, 0, 12, ..., 89, 0, 7714]])]\n"
]
}
],
"source": [
"values = [geo['label'], geo['commuters']]\n",
"\n",
"print(values)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
243 changes: 243 additions & 0 deletions doc/devlog/2024-06-05.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 2024/06/05 LODES User Manual\n",
"_Author: Meaghan Freund_\n",
"\n",
"### **Introduction**\n",
"LODES (LEHD Origin-Destination Employment Statistics) is a collection of state-organized data files, owned by the U.S. Census Bureau and is a part of the LEHD (Longitudinal Employer-Household Data) program. The datasets are collected through LEHD Census data, which is made through combining previously collected survey and administrative data from participating states on jobs, businesses, and workers. \n",
"\n",
"### **Data Collection and Updates**\n",
"\n",
"LODES data is collected and mapped at the smallest geographic unit that the Census Bureau collects: blocks. This data is collected annually and updated regularly through new versions of LODES. The most recent version, LODES8, spans from 2002-2021 inclusively and maps all blocks with 2020 Census delineations, including blocks that were originally defined in past Census delineations, such as from the 2010 Census.\n",
"\n",
"### **Geographic Coverage**\n",
"\n",
"LODES covers 51 states in the U.S., including defining Washington D.C. as its own state, and territorial partners such as Puerto Rico and the Virgin Islands. However, in LODES8, Puerto Rico is the only territorial partner that is covered. Similarly, certain states do not have files in specific years that LODES provides. Some of the reasons can include a lack of availability of data for a state or a location not being in regular production for LODES.\n",
"The current list of exceptions includes:\n",
"- *2002*: Arkansas, Arizona, DC, Massachusetts, Mississippi, New Hampshire\n",
"- *2003*: Arizona, DC, Massachusetts, Mississippi\n",
"- *2004-2009*: DC, Massachusetts\n",
"- *2010*: Massachusetts\n",
"- *2017-2018*: Alaska\n",
"- *2019-2021*: Alaska, Arkansas, Mississippi\n",
"\n",
"These intersections do lack information concerning other files within LODES, but for the ADRIO maker, these exceptions are omitted from the calls altogether.\n",
"\n",
"### **Data Files**\n",
"\n",
"Of the three files available in LODES (RAC, WAC, and OD) the ADRIO maker relies on the Origin-Destination files to map worker flows between residence and workplace locations through matrices. The Origin-Destination files provide the number of individuals who live in a specific home GEOID and commute to a corresponding work GEOID.\n",
"\n",
"### **Additional Resources**\n",
"\n",
"For more detailed information:\n",
"- Visit the [LEHD Data Page](https://lehd.ces.census.gov/data/#lodes) on the Census Bureau's website concerning the LODES files.\n",
"- Refer to the documentation for the most recent version, [LODES8.1](https://lehd.ces.census.gov/data/lodes/LODES8/LODESTechDoc8.1.pdf)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### **Basic Queries**\n",
"\n",
"The 'label' and 'commuters' are two simple yet imperative queries that show the basic functionality of the LODES ADRIO maker. The 'label' query represents the GEOIDs that are involved with the commuter matrices. The input given by the user for the scope, in this case being states, is translated into a list of GEOIDs. The 'commuters' query shows the total number of workers moving from a home GEOID to a work GEOID as a matrix. The matrix is read so that the rows represent the residence GEOID and the columns are the work location GEOID."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Home/Work GEOIDs: ['04' '08' '32' '35']\n",
"\n",
"Commuters Matrix:\n",
" [[2550132 2582 13263 8100]\n",
" [ 1202 2405258 382 5557]\n",
" [ 3552 535 1179411 361]\n",
" [ 6813 4824 409 764244]]\n"
]
}
],
"source": [
"from epymorph.geo.adrio import adrio_maker_library\n",
"from epymorph.data_shape import Shapes\n",
"from epymorph.geo.dynamic import DynamicGeo\n",
"from epymorph.geo.spec import DynamicGeoSpec, Year, attrib\n",
"from epymorph.geography.us_census import (StateScope)\n",
"\n",
"spec = DynamicGeoSpec(\n",
" attributes=[\n",
" attrib('label', str, Shapes.N),\n",
" attrib('commuters', int, Shapes.NxN),\n",
" ],\n",
" time_period=Year(2015),\n",
" scope=StateScope.in_states_by_code([\"AZ\", \"CO\", \"NV\", \"NM\"]),\n",
" source={\n",
" 'label': 'LODES:geoid',\n",
" 'commuters': 'LODES'\n",
" }\n",
")\n",
"\n",
"geo = DynamicGeo.from_library(spec, adrio_maker_library)\n",
"\n",
"\n",
"print(f\"Home/Work GEOIDs: {geo['label']}\\n\")\n",
"\n",
"print(f\"Commuters Matrix:\\n {geo['commuters']}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Attributes\n",
"\n",
"The 'commuters' attribute outputs the total number of workers commuting, but LODES provides three categories for attributes specifying the type of workers: Age, Monthly Income, and Industry Sectors. Within each category, there are three ranges within them, and the sum of the ranges equals the total number of workers. All of these categories and the total commuters are displayed as NxN matrices of integers, excluding the label query.\n",
"\n",
"## Age\n",
"- 'commuters_29_under'\n",
" - Commuters that are ages 29 and under.\n",
"- 'commuters_30_to_54\n",
" - Commuters that are between the ages of 30 and 54.\n",
"- 'commuters_55_over'\n",
" - Commuters that are ages 55 and over.\n",
"\n",
"## Monthly Income\n",
"- 'commuters_1250_under_earnings'\n",
" - Commuters that earn $1250 and under per month.\n",
"- 'commuters_1251_to_3333_earnings'\n",
" - Commuters that earn between $1251 and $3333 per month.\n",
"- 'commuters_3333_over_earnings'\n",
" - Commuters that earn over $3333 per month.\n",
"\n",
"## Industry Sector\n",
"- 'commuters_goods_producing_industry'\n",
" - Commuters that work in Goods Producing industry sectors.\n",
"- 'commuters_trade_transport_utility_industry'\n",
" - Commuters that work in Trade, Transportation, and Utility industry sectors.\n",
"- 'commuters_other_industry'\n",
" - Commuters that work under all other service industry sectors other than the above claimed industries.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Job Type\n",
"Along with the above categories, LODES provides files detailing different job types and the total number of jobs under that type. However, unlike the attributes, these matrices do not sum to be the total number of workers. \n",
"\n",
"- 'all_jobs'\n",
" - All jobs regardless of job type. Allows for multiple jobs per person and is the default when calling the above attributes.\n",
"- 'primary_jobs'\n",
" - Primary jobs, which a primary job is the highest paying job for an individual worker for the year. Limits to one job per worker.\n",
"- 'all_private_jobs'\n",
" - All private jobs, which are privately owned businesses and organizations excluding federal government jobs.\n",
"- 'private_primary_jobs'\n",
" - Primary jobs within the private sector.\n",
"- 'all_federal_jobs'\n",
" - All jobs within the federal government sector.\n",
"- 'federal_primary_jobs\n",
" - Jobs under the federal government sector that are defined as primary jobs."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Below is an example of calling the three different age ranges provided by LODES in a geo spec. The example here loads four counties into the matrices rather than the four states that was used previously."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Commuters ages 29 and under:\n",
" [[369848 11 287 195]\n",
" [ 12 43020 9 29]\n",
" [ 245 11 186798 48]\n",
" [ 119 26 41 55536]]\n",
"\n",
"Commuters between ages 30 and 54:\n",
" [[922520 20 817 354]\n",
" [ 19 104126 27 37]\n",
" [ 656 7 493523 72]\n",
" [ 230 38 120 135000]]\n",
"\n",
"Commuters ages 55 and over:\n",
" [[337141 10 324 151]\n",
" [ 18 42355 9 16]\n",
" [ 325 3 182295 53]\n",
" [ 59 13 56 56352]]\n",
"\n"
]
}
],
"source": [
"from epymorph.data_shape import Shapes\n",
"from epymorph.geo.spec import DynamicGeoSpec, Year, attrib\n",
"from epymorph.geography.us_census import (CountyScope)\n",
"from epymorph.geo.adrio import adrio_maker_library\n",
"from epymorph.geo.dynamic import DynamicGeo\n",
"\n",
"spec = DynamicGeoSpec(\n",
" attributes=[\n",
" attrib('label', str, Shapes.N),\n",
" attrib('commuters_29_under', int, Shapes.NxN),\n",
" attrib('commuters_30_to_54', int, Shapes.NxN),\n",
" attrib('commuters_55_over', int, Shapes.NxN),\n",
" ],\n",
" time_period=Year(2015),\n",
" scope=CountyScope.in_counties([\"04013\", \"08041\", \"32003\", \"35001\"]),\n",
" source={\n",
" 'label': 'LODES:geoid',\n",
" 'commuters_29_under': 'LODES',\n",
" 'commuters_30_to_54': 'LODES',\n",
" 'commuters_55_over': 'LODES',\n",
" }\n",
")\n",
"\n",
"geo = DynamicGeo.from_library(spec, adrio_maker_library)\n",
"\n",
"print(f\"Commuters ages 29 and under:\\n {geo['commuters_29_under']}\\n\")\n",
"\n",
"print(f\"Commuters between ages 30 and 54:\\n {geo['commuters_30_to_54']}\\n\")\n",
"\n",
"print(f\"Commuters ages 55 and over:\\n {geo['commuters_55_over']}\\n\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0rc1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 90cd112

Please sign in to comment.