-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_eviction_data.mk
203 lines (162 loc) · 8.12 KB
/
create_eviction_data.mk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# Merges eviction data with demographics data, creates data for search,
# creates data used in map data panel, creates ranking data, creates
# public data for export.
# Edit node commands to use additional memory
mapshaper_cmd = node --max_old_space_size=4096 $$(which mapshaper)
geojson_label_cmd = node --max_old_space_size=4096 $$(which geojson-polygon-labels)
geo_types = states counties cities tracts
census_geo_types = $(foreach g,$(geo_types),census/$(g).geojson)
sub_eviction_cols = evictions,eviction-filings,eviction-rate,eviction-filing-rate
low_cols = evictions-low,eviction-filings-low,eviction-rate-low,eviction-filing-rate-low
high_cols = evictions-high,eviction-filings-high,eviction-rate-high,eviction-filing-rate-high
eviction_cols = $(sub_eviction_cols),$(low_cols),$(high_cols)
ts := $(shell date "+%H%M%S")
# build ID to use for source data
BUILD_ID?=2018-12-14
output_files = $(foreach g,$(geo_types),data/$(g).csv)
public_data = data/public/US/all.csv data/public/US/national.csv conf/DATA_DICTIONARY.txt $(foreach g, $(geo_types), grouped_public/$(g).csv data/non-imputed/$(g).csv)
tool_data = data/rankings/states-rankings.csv data/rankings/cities-rankings.csv data/search/counties.csv data/search/locations.csv data/avg/us.json data/us/national.csv
# Don't delete files created throughout on completion
.PRECIOUS: data/demographics/%.csv
.PHONY: all clean deploy deploy_public_data deploy_app_data deploy_logs help
## all : Create all output data
all: $(output_files) $(tool_data) $(public_data)
base_data: $(output_files)
## clean : Remove created files
clean:
rm -f data/*.csv
rm -f data/*.gz
rm -f $(tool_data)
rm -f log/*.txt
# Based on https://swcarpentry.github.io/make-novice/08-self-doc/
## help : Print help
help: create_eviction_data.mk
perl -ne '/^## / && s/^## //g && print' $<
# DEPLOY TASKS
## deploy : Deploy gzipped eviction / demographics data to S3
deploy:
for f in data/*.csv; do gzip $$f; done
for f in data/*.gz; do aws s3 cp $$f s3://$(S3_SOURCE_DATA_BUCKET)/$(BUILD_ID)/$$(basename $$f); done
## deploy_app_data : Deploy all data files used in the map and rankings tool, remove old exports
deploy_app_data: $(tool_data)
for f in $^; do aws s3 cp $$f s3://$(S3_TOOL_DATA_BUCKET)/$$f --acl=public-read --cache-control max-age=2628000; done
aws s3 rm s3://$(S3_EXPORTS_BUCKET) --recursive
aws cloudfront create-invalidation --distribution-id $(CLOUDFRONT_ID_DEV) --paths "/*"
aws cloudfront create-invalidation --distribution-id $(CLOUDFRONT_ID_PROD) --paths "/*"
## deploy_public_data : Create and deploy public data exports
deploy_public_data: $(census_geo_types) $(public_data)
python3 scripts/create_data_public.py
aws s3 cp ./data/public s3://$(S3_DATA_DOWNLOADS_BUCKET) --recursive --acl=public-read
aws s3 cp ./data/non-imputed s3://$(S3_DATA_DOWNLOADS_BUCKET)/non-imputed --recursive --acl=public-read
aws s3 cp ./conf/DATA_DICTIONARY.txt s3://$(S3_DATA_DOWNLOADS_BUCKET)/DATA_DICTIONARY.txt --acl=public-read
aws s3 cp ./conf/CHANGELOG.txt s3://$(S3_DATA_DOWNLOADS_BUCKET)/CHANGELOG.txt --acl=public-read
aws s3 cp ./conf/changes s3://$(S3_DATA_DOWNLOADS_BUCKET)/changes --recursive --acl=public-read
aws cloudfront create-invalidation --distribution-id $(PUBLIC_DATA_CLOUDFRONT_ID) --paths "/*"
## deploy_logs : Deploy a log of the demographics <- eviction merge
deploy_logs:
cat log/*.txt > log/demographics_eviction_join_log.txt
aws s3 cp log/demographics_eviction_join_log.txt s3://$(S3_SOURCE_DATA_BUCKET)/$(BUILD_ID)/demographics_eviction_join_log_$(ts).txt
rm -f log/*.txt
### GENERAL DATA
## data/%.csv : Join evictions and demographics
data/%.csv: data/demographics/%.csv data/evictions/%.csv
python3 scripts/csvjoin.py GEOID,year $^ > $@
### MAP DATA PANEL DATA
## data/avg/us.json : Averages of US data
data/avg/us.json: data/us/national.csv
mkdir -p $(dir $@)
cat $< | \
python3 scripts/create_us_average.py > $@
## data/us/national.csv : US data by year for tool
data/us/national.csv: data/public/US/national.csv
mkdir -p $(dir $@)
cp $< $@
### SEARCH DATA
## data/search/locations.csv : Search data for counties and states
data/search/locations.csv: data/search/counties.csv data/search/states.csv
csvstack $^ > $@
## data/search/%.csv : Create search data
data/search/%.csv: data/%.csv data/search/%-centers.csv
python3 scripts/create_search_data.py $^ $@
## data/search/%-centers.csv : Convert geography centers to CSV
data/search/%-centers.csv: centers/%.geojson
mkdir -p $(dir $@)
in2csv --format json -k features $< > $@
### CITY RANKING DATA
## data/rankings/%-rankings.csv : Create rankings data
data/rankings/%-rankings.csv: data/%.csv data/rankings/%-centers.csv
python3 scripts/create_data_rankings.py $^ $@
## data/rankings/%-centers.csv : Convert GeoJSON centers to CSV for rankings
data/rankings/%-centers.csv: centers/%.geojson
mkdir -p $(dir $@)
in2csv --format json -k features $< > $@
### PUBLIC EXPORT DATA
## grouped_public/%.csv : Need to combine full data CSVs for GeoJSON merge
grouped_public/%.csv: data/public/US/%.csv
mkdir -p $(dir $@)
cat $< | \
python3 scripts/process_group_data.py | \
perl -ne 'if ($$. == 1) { s/"//g; } print;' > $@
## data/public/US/all.csv : Full US public data
data/public/US/all.csv: $(foreach g, $(geo_types), data/public/US/$(g).csv)
mkdir -p $(dir $@)
csvstack $^ > $@
## data/public/US/national.csv : US data by year
data/public/US/national.csv:
mkdir -p $(dir $@)
aws s3 cp s3://$(S3_SOURCE_DATA_BUCKET)/$(BUILD_ID)/evictions/us.csv.gz - | \
gunzip -c | \
python3 scripts/convert_varnames.py | \
csvcut -c year,renter-occupied-households,$(sub_eviction_cols) > $@
## data/public/US/%.csv : For US data, pull demographics and full eviction data
data/public/US/%.csv: data/demographics/%.csv data/full-evictions/%.csv
mkdir -p $(dir $@)
python3 scripts/csvjoin.py GEOID,year $^ | \
python3 scripts/convert_col_order.py > $@
### DATA FETCHED FROM S3 SOURCE
raw: $(foreach g,$(geo_types),data/raw/$(g).csv)
full-evictions: $(foreach g,$(geo_types),data/full-evictions/$(g).csv)
## data/full-evictions/cities.csv : Override full-evictions data for cities/places
data/raw/%.csv:
mkdir -p $(dir $@)
aws s3 cp s3://$(S3_SOURCE_DATA_BUCKET)/$(BUILD_ID)/evictions/$(notdir $@).gz - | \
gunzip > $@
## data/full-evictions/cities.csv : Override full-evictions data for cities/places
data/full-evictions/cities.csv:
mkdir -p $(dir $@)
aws s3 cp s3://$(S3_SOURCE_DATA_BUCKET)/$(BUILD_ID)/evictions/cities-unrounded.csv.gz - | \
gunzip -c | \
python3 scripts/convert_varnames.py | \
python3 scripts/create_fake_data.py > $@
## data/full-evictions/%.csv : Pull eviction data, including imputed/subbed
data/full-evictions/%.csv:
mkdir -p $(dir $@)
aws s3 cp s3://$(S3_SOURCE_DATA_BUCKET)/$(BUILD_ID)/evictions/$(notdir $@).gz - | \
gunzip -c | \
python3 scripts/convert_varnames.py | \
python3 scripts/create_fake_data.py > $@
## data/evictions/%.csv : Pull eviction data, get only necessary columns
data/evictions/%.csv:
mkdir -p $(dir $@)
aws s3 cp s3://$(S3_SOURCE_DATA_BUCKET)/$(BUILD_ID)/evictions/$(notdir $@).gz - | \
gunzip -c | \
python3 scripts/convert_varnames.py | \
python3 scripts/create_fake_data.py | \
python3 scripts/convert_crosswalk_geo.py $* | \
python3 utils/subset_cols.py GEOID,year,$(eviction_cols) > $@
## data/demographics/%.csv : Pull demographic data
data/demographics/%.csv:
$(MAKE) -f fetch_s3_source.mk $@
## data/non-imputed/%.csv: : Non-imputed data for downloads
data/non-imputed/%.csv:
mkdir -p $(dir $@)
aws s3 cp s3://$(S3_SOURCE_DATA_BUCKET)/$(BUILD_ID)/non-imputed/$(notdir $@).gz - | \
gunzip -c | \
python3 scripts/convert_varnames.py > $@
## centers/%.geojson : GeoJSON centers
centers/%.geojson: census/%.geojson
mkdir -p $(dir $@)
$(geojson_label_cmd) --style largest $< > $@
## census/%.geojson : Census GeoJSON from S3 bucket
census/%.geojson:
$(MAKE) -f fetch_s3_source.mk $@