-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocessing.py
141 lines (108 loc) · 4.91 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
*********** [UNUSED] ***********
The dataset contains 62,058 high quality Google Street View images. The images cover the
downtown and neighboring areas of Pittsburgh, PA; Orlando, FL and partially Manhattan, NY.
Accurate GPS coordinates of the images and their compass direction are provided as well.
For each Street View placemark (i.e. each spot on one street), the 360° spherical view is broken
down into 4 side views and 1 upward view. There is one additional image per placemark which
shows some overlaid markers, such as the address, name of streets, etc.
Naming format:
The name of the images has the following format: XXXXXX_Y.jpg
XXXXXX is the identifier of the placemark. There are total number of 10343 placemarks in this
dataset, so XXXXXX ranges from 000001 to 10343.
Y is the identifier of the view. 1, 2, 3 and 4 are the side views and 5 is the upward view. 0 is the
view with markers overlaid (explained above). Thus, there are total number of 6 images per
placemark.
GPS Coordinates & Compass Direction:
The Matlab file 'GPS_Long_Lat_Compass.mat' includes the GPS coordinates and compass
direction of each placemark. The row number XXXXXX corresponds to the placemark number
XXXXXX. The 1st and 2nd columns are the latitude and longitude values. The 3rd column is the
compass direction (in degrees from North towards West) of the view number 4. The rest of the
side views are exactly 90° apart from the view number 4.
The file 'Cartesian_Location_Coordinates.mat' contains the location coordinates in a metric
Cartesian system (unlike longitude and latitude). The Euclidean distance between such XYZ
coordinates of two points is the actual distance (in meters) between them.
Image Geo-localization based on Multiple Nearest Neighbor Feature Matching using
Generalized Graphs. Amir Roshan Zamir and Mubarak Shah. IEEE Transactions on
Pattern Analysis and Machine Intelligence (TPAMI), 2014.
"""
"""
*********** [UNUSED] ***********
Instructions:
- create a data/ directory outside of worldNET directory
- from http://www.cs.ucf.edu/~aroshan/index_files/Dataset_PitOrlManh/:
- get Cartesian_Location_Coordinates.mat
- get all parts from zipped_images/ and add image contents to an images/ subdirectory
- run get_coordinates() to retrieve all XYZ coordinates, indexed in order
- run load_images() to load in all images
"""
import os
import tensorflow as tf
import numpy as np
from scipy.io import loadmat
from PIL import Image
from tqdm import tqdm
from joblib import Parallel, delayed
import multiprocessing
import matplotlib
matplotlib.use("tkagg")
from matplotlib import pyplot as plt
DATA_PATH = "../data/"
NUM_SAMPLES = 500
def get_coordinates():
"""
retrieves the coordinates of all images
"""
filename = "Cartesian_Location_Coordinates.mat"
print("loading", NUM_SAMPLES, "coordinates from", DATA_PATH + filename)
coords = np.array(loadmat(DATA_PATH + filename)["XYZ_Cartesian"][:NUM_SAMPLES])
return (coords - np.min(coords)) / (np.max(coords) - np.min(coords))
def load_image(filename, target_shape):
"""
load a single image
"""
image = Image.open(filename).resize(target_shape)
return np.asarray(image) / 255
def load_images(image_dir, target_shape=(224, 224), image_arr_filename="image_arr", save=True, load=True, one_angle=False):
"""
load and save all available images
"""
if load:
if os.path.exists(DATA_PATH + image_arr_filename + ".npy"):
return np.load(DATA_PATH + image_arr_filename + ".npy")
num_angles = 4
files = os.listdir(DATA_PATH + image_dir)
files.sort()
if ".DS_Store" in files: files.remove('.DS_Store')
files = files[:NUM_SAMPLES * 6]
print("loading", NUM_SAMPLES, "images from", DATA_PATH + image_dir)
if one_angle:
images = np.empty((NUM_SAMPLES, target_shape[0], target_shape[1], 3))
else:
images = np.empty((NUM_SAMPLES, num_angles, target_shape[0], target_shape[1], 3))
indices, angles = [], []
filepaths = []
for file in files:
index, angle = map(int, file.split('.')[0].split('_'))
index, angle = index - 1, angle - 1
if angle == -1 or angle == 4: continue
indices.append(index)
angles.append(angle)
filepaths.append(DATA_PATH + image_dir + file)
num_cores = multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores)(delayed(load_image)(filepath, target_shape) for filepath in tqdm(filepaths))
for index, angle, result in zip(indices, angles, results):
if one_angle:
images[index] = result
else:
images[index, angle] = result
if save:
np.save(DATA_PATH + image_arr_filename, images)
print("image array saved at", DATA_PATH + image_arr_filename)
return images
def main():
images = load_images("images/")
coords = get_coordinates()
print(coords.shape, images.shape, one_angle=True)
if __name__ == "__main__":
main()