-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathILCI_calculations_BAS_PASIN.py
284 lines (232 loc) · 14.3 KB
/
ILCI_calculations_BAS_PASIN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# -*- coding: utf-8 -*-
# ----------------------------------
# Code to calculate ILCI on all BAS PASIN data
# This code reads in all of BAS PASIN data published on BAS Polar Aerogeophysics
# Data Portal and calculates the ILCI for the chirp data.
# Details on ILCI methodology can be found in Karlsson et al.
# (2012, EPSL, doi: 10.1016/j.epsl.2012.04.034)
# ILCI calculations are exported as two large files: 1x geopackage and 1x CSV
# file per survey, with 5x columns as follows:
# FltID (Flight ID), X (EPSG:3031), Y (EPSG:3031), ILCI, Normalised_ILCI
# As per Karlsson et al. (2012), we disregard the upper and lower fifths of the
# radar profile and smooth the data using a 1000-point average.
# ----------------------------------
# Code written by J. Bodart (BAS/UoE) - 18/10/2021
# To cite the code + data output, refer to the associated manuscript
# Fremand and Bodart et al. (2022, ESSD, doi: xxxxxx)
# ----------------------------------
# import python libraries
# ----------------------------------
import numpy as np
import pandas as pd
import os
import netCDF4 as nc4
import glob
import geopandas as gpd
# ----------------------------------
# Get paths where NetCDF data lives on computer
#-----------------------------------------------
os.chdir('D:/British_Antarctic_Survey/data/')
surveys = sorted(glob.glob('D:/British_Antarctic_Survey/data/*/netcdf/')) # sort files in order
#-----------------------------------------------
# Loop starts here
#-----------------------------------------------
# Loop through all survey folders
for p in range(0, len(surveys)):
# Initialise export of data (includes setting up output path, folderID, arrays,
# CSV where data will be stored, etc.)
#------------------------------------------------
Main_path = surveys[p]
if 'FISS_1516' in Main_path:
folderID = Main_path.split('/')[-6]
else:
folderID = Main_path.split('\\')[1]
print ('############## Importing NetCDF files from %s survey ##############' %folderID)
output_path = "D:/British_Antarctic_Survey/Aerogeophysics_data_paper/ILCI_figure/ILCI_results/test/"
nc_files = sorted(glob.glob(Main_path + '*.nc')) # sort files in order
# initialise CSV file with 5x columns as set-up
result = open('D:/British_Antarctic_Survey/Aerogeophysics_data_paper/ILCI_figure/ILCI_results/test/%s_ILCI.csv' %folderID, 'w+')
result.write('FltID, X_EPSG3031, Y_EPSG3031, ILCI, Normalised_ILCI\n')
# initialise array to store results into after loop
# initialisation set-up: 1 row, 4 columns
dfss = np.full((1,4), 0)
#-----------------------------------------------
# Loop through all NetCDF files for that specific survey
#-----------------------------------------------
for m in range(0,len(nc_files)):
# import netcdf file
#-----------------------------------------------
ncin = nc4.Dataset(nc_files[m]) # open NetCDF file from path
fltid = ncin.flight # get flight line name from netCDF
print ('####### Importing and reading netcdf: %s #######' %fltid)
#-----------------------------------------------
# First, extract radar variable from nc files
#-----------------------------------------------
# AGAP, BBAS, GRADES-IMAGE, FISS 2015 & 2016, ICEGRAV, POLARGAP (non-polarised) WISE-ISODYN
# all have only 1x chirp
if 'chirp_data' in list(ncin.variables):
chirpData = ncin.variables['chirp_data'][:].data # read in chirp radar data array
# Make sure you select correct chirp in NetCDF (in case of multiple)
# or if chirp does not exist, avoid this NetCDF
elif not 'chirp_data' in list(ncin.variables):
if 'IMAFI' in folderID:
chirpData = ncin.variables['chirp_DLRsar_data'][:].data # read in one of two chirp data array
elif 'POLARGAP' in folderID and 'polarised_chirp_PPVV_data' in list(ncin.variables):
chirpData = ncin.variables['polarised_chirp_PPVV_data'][:].data # read in in one of two chirp data array
else:
break # if not chirp, then break loop here and jump to next NetCDF
# Make sure you use the correct trace variable if there are multiple (e.g. POLARGAP and AGAP)
if 'POLARGAP' in folderID or 'AGAP' in folderID:
traces_nc = ncin.variables['traces_chirp'][:].data # read in trace number from chirp variable
else: # for all others
traces_nc = ncin.variables['traces'][:].data # read in trace number (only one exists in NetCDF)
#-----------------------------------------------
# Second, import other NetCDF variables (X, Y, surf and bed picks)
#-----------------------------------------------
x_nc = ncin.variables['x_coordinates'][:].data # read in X positions (EPSG: 3031)
y_nc = ncin.variables['y_coordinates'][:].data # read in Y positions (EPSG: 3031)
# read in surface and bed pick from NetCDF file and convert -9999 to NaN
surf_pick = ncin.variables['surface_pick_layerData'][:].data # read in surf pick array
bed_pick = ncin.variables['bed_pick_layerData'][:].data # read in bed pick array
surf_pick[surf_pick == -9999] = 'nan' # convert -9999 to NaNs
bed_pick[bed_pick == -9999] = 'nan' # convert -9999 to NaNs
#-----------------------------------------------
# If a flightline only contains NaNs in surface or bed pick, ILCI cannot
# be calculated, thus skip flightline entirely if this occurs
#-----------------------------------------------
if all(np.isnan(surf_pick)) or all(np.isnan(bed_pick)):
print('Flightline %s only contains NaN - excluded from calculations' %fltid)
pass
# If there is valid values, then continue with the calculations
else:
# Remove data where there are NaNs in the surface pick variable
#-----------------------------------------------
nan_idx = np.where(~np.isnan(surf_pick)) # get index of non-NaN values
x_nc = x_nc[nan_idx] # remove NaN in X variable
y_nc = y_nc[nan_idx] # remove NaN in Y variable
traces_nc = traces_nc[nan_idx] # remove NaN in trace variable
chirpData = chirpData[:, nan_idx[0]]
surf_pick = surf_pick[nan_idx] # remove NaN in surface pick variable
bed_pick = bed_pick[nan_idx] # remove NaN in bed pick variable
#-----------------------------------------------
# split data into 8000-trace shunks
#-----------------------------------------------
nTrace = len(traces_nc) # read length of trace variable
idx = np.arange(1,nTrace,8000) # get index of each 8000-trace segments
dfs = np.full((1,4), 0) # initialisation set-up: 1 row, 4 columns
new_df = []
# loop over each index enveloppe
print ('Calculating the indices for the data')
for i in range(1, len(idx)):
# calculate indices for all values that fall within 'idx'
# variable above for last array in record
#-----------------------------------------------
# for last array in record
if i == len(idx)-1:
boolean = (traces_nc>idx[i])
seg = np.where(boolean)[0]
indices = [(idx[i]), nTrace]
# for all arrays except last array in record
else:
boolean = (traces_nc>=idx[i]) & (traces_nc < idx[i+1])
seg = np.where(boolean)[0]
indices = [(idx[i]), (idx[i+1])]
# select trace number, radar data, x/y positions,
# and surf/bed picks based on 'idx' variable above
traces_seg = traces_nc[seg]
chirp_seg = chirpData[:,seg]
surfPick_seg = surf_pick[seg]
bedPick_seg = bed_pick[seg]
x_seg = x_nc[seg]
y_seg = y_nc[seg]
# create array with name of flightline as string
fltID_arr = [fltid for i in range(len(seg))]
# smooth chirp_data using a window of 1000 traces (500 either side)
print ('Smoothing the chirp data for index: %s/%s' %(i, len(idx)-1))
data = chirp_seg
kk = np.arange(499,len(chirp_seg[1])-498)
for j in np.arange(499,len(chirp_seg[1])-498):
data[:,j] = chirp_seg[:,j-498:j+498].mean(axis=1)
#-----------------------------------------------
# Calculate ILCI
#-----------------------------------------------
print ('Calculating ILCI and normalising results')
ilci_data = [] # initialize array
for l in range(0, len(data[1])):
# create log of data
data_log = np.log(data[:,l])
# round surf/bed pick to nearest integer if not a nan
# otherwise leave as nan
if np.isnan(surfPick_seg[l]) or np.isnan(bedPick_seg[l]):
spick = surfPick_seg[l]
bpick = bedPick_seg[l]
else:
spick = int(np.round(surfPick_seg[l]))
bpick = int(np.round(bedPick_seg[l]))
#-----------------------------------------------
# exclude first and last fifths of radar data
#-----------------------------------------------
# if bed-surf pick is smaller than 20 (e.g. too close to call)
# or bed pick is larger than y-axis of radar data (happens...)
# then place nan. Otherwise, calculate ILCI
if bpick-spick < 20 or bpick > len(data) or np.isnan(bpick) or np.isnan(spick):
ilci = np.nan # place nan
else:
good = data_log[spick:bpick]
bad = int(np.round(len(good)/5)) # length of data to cut
data_cut = good[bad:-bad] # array without 1st and last fifth
#-----------------------------------------------
# calculate ILCI
#-----------------------------------------------
ilci = np.nanmean(abs(np.gradient(data_cut)))
# append array after each iteration in the loop
ilci_data.append(ilci)
#-----------------------------------------------
# append data to large array
new_df = np.column_stack([fltID_arr, x_seg,y_seg,ilci_data])
new_df = new_df[499:-498] # cut first and last 500 to match radar smoothing window
# concatenate all results
dfs = np.concatenate((dfs,new_df),axis=0)
#-----------------------------------------------
# combine each flightline data into a large data array
#-----------------------------------------------
dfss = np.concatenate((dfss,dfs), axis=0)
#-----------------------------------------------
# Place data into panda dataframe and drop any consecutive zeros at the start
# arising from initialisation of arrays in code above
#-----------------------------------------------
# only put last 3x columns into the dataframe initially (i.e. not Flight ID yet)
results = pd.DataFrame(dfss[:,1:], columns=['X_EPSG3031', 'Y_EPSG3031', 'ILCI'], dtype=float)
# find amount of zeros to delete at start of record due to array initialisation
# (ugly way)
remove_zeros = len(results.loc[(results==0).all(axis=1)])
# remove zeros at start of record in dataframe
results = results.loc[~(results==0).all(axis=1)] # remove zeros at start of record
# insert flight ID column into dataframe - make sure those zeros are excluded
results.insert(0,"FltID", dfss[remove_zeros:,0])
#-----------------------------------------------
# Normalise ILCI between 0 and 1 for entire survey and attach new column to dataframe
# (optional)
#-----------------------------------------------
normalised_ILCI = (results['ILCI'] - np.min(results['ILCI'])) / (np.max(results['ILCI']) - np.min(results['ILCI'])) # normalise ILCI
results["Norm_ILCI"] = normalised_ILCI # attach to dataframe as last column
#-----------------------------------------------
# Write dataframe to point shapefile and CSV file
#-----------------------------------------------
# Convert dataframe to geopandas GeoDataFrame
gdf = gpd.GeoDataFrame(results, geometry=gpd.points_from_xy(results['X_EPSG3031'], results['Y_EPSG3031']))#, dtype='float')
gdf = gdf.set_crs("EPSG:3031") #Set the coordinate system
schema = gpd.io.file.infer_schema(gdf) # extract schema of dataframe
# make sure all other variables after flight ID are floats
vars_gdf = gdf.columns.tolist()
vars_gdf.remove('geometry')
for k in range(1, len(vars_gdf)):
schema['properties'][vars_gdf[k]] = 'float'
# Export to point shapefile
print ('Writing data to shapefile and CSV file')
#gdf.to_file(output_path + folderID + '_ILCI.shp', driver="ESRI Shapefile", schema=schema) # To save as a shapefile
gdf.to_file(output_path + folderID + '_ILCI.gpkg', driver="GPKG", schema=schema) # To save as a geopackage
# Export to csv file
results.to_csv(output_path + folderID + '_ILCI.csv',index=False)
print ('Done with %s survey - now onto next survey' %folderID)
#-----------------------------------------------