-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSHPQuery.py
62 lines (57 loc) · 3.09 KB
/
SHPQuery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import json
import sys
import time
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
sidewalk_data_dir = "./src/sidewalkdata/"
county_to_file = sidewalk_data_dir + "which_county_files.json"
output_dir = "./query_output/"
class SHPQuery:
def __init__(self, query_id, county, bound_coords, crs):
"""
SHPQuery is a class that allows for easy querying of spacial data files using bounding box coordinates
:param query_id: An integer, this is a unique id number for the query
:param county: A string, used to get the directory that holds the files for the county of interest
:param bound_coords: The North-East and South-West coordinates of a bounding box to query data within
:param crs: The preferred coordinate reference system id for the data to be returned in E.G. "epsg:4326"
"""
# try:
self.bounding_box = gpd.GeoDataFrame({'geometry':[Point(bound_coords['ne']['lng'], bound_coords['ne']['lat']), Point(bound_coords['sw']['lng'], bound_coords['sw']['lat'])]}, index=["p1", "p2"], crs={"init":crs})
# TODO: check if county exists in json and has data files - output error if not
ctf_file_obj = open(county_to_file, 'r')
ctf_file_json = json.load(ctf_file_obj)
self.file_names = ctf_file_json[county]["files"]
self.file_types = ctf_file_json[county]["types"]
ctf_file_obj.close()
self.county_directory = sidewalk_data_dir + county + "/"
self.county_data_crs = self.get_crs_from_file(self.file_names[0])
# print(self.county_data_crs)
self.bounding_box = self.bounding_box.to_crs(self.county_data_crs)
# print(self.bounding_box)
self.dfs = [df.to_crs(crs) for df in self.get_county_data()]
# TODO: Need to try to get around stdout limit
# TODO: Make it so there is a max query size and it will fill in more as it goes
pd.set_option('display.max_columns', None)
for df in self.dfs:
if not df.empty:
# print(df)
sys.stdout.write(df.to_json().replace(' ','') + "\n") # Write this to a stream like stdout
time.sleep(1)
# self.output_json = self.df.to_file(output_dir + str(query_id) + "_" + county + ".json", driver="GeoJSON")
# print("query: " + query_id + "_" + county + " finished with status 0")
# except:
# print("query: " + query_id + "_" + county + " exited with status -1")
def get_crs_from_file(self, file_name):
temp_df = gpd.read_file(self.county_directory + file_name, rows=1)
return temp_df.crs
def get_county_data(self):
file_dfs = []
for i in range(len(self.file_names)):
file_name = self.file_names[i]
file_type = self.file_types[i]
file_df = gpd.read_file(self.county_directory + file_name, bbox=self.bounding_box)
file_df["FEATURE"] = file_type # Assign the file name to id so that we have the name when converting to json
file_dfs.append(file_df)
return file_dfs
# return pd.concat(file_dfs)