-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMSResampler.py
264 lines (241 loc) · 12.4 KB
/
MSResampler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import numpy as np
from pyrap.tables import table
import os
import sys
import pylab
def save_visibility_arrays (msname,arrays,column="CORRECTED_DATA"):
"""Saves a set of visibility arrays to the MS.
arrays is a list of (p,q,dtimepq,data) tuples, where p & q are antenna indices,
and data is the corresponding array of shape ntime spectral windows,ntime,nfreq,ncorr.
The shape must match the shape of the MS, or errors will result.
"""
tab = table(msname,readonly=False,ack=False);
# read in data column and antenna indices, fill data with 0
a1 = tab.getcol("ANTENNA1")
a2 = tab.getcol("ANTENNA2")
# read spectral windows ID
data_desc_id = tab.getcol("DATA_DESC_ID");
try:
# load the column where to save the data, create this column if this doen't exist
data = tab.getcol(column)
print"===== opening column %s ====="%(column)
except:
print "===== %s column doen't exits: make sure the column exist ====="%(column)
data.fill(0.);
try:
#print"===== opening columns FLAG_ROW and WEIGHT ====="
flagrow = tab.getcol("FLAG_ROW");
weight = tab.getcol("WEIGHT")
flagfreq = tab.getcol("FLAG")
flagfreq.fill(0.)
flagrow.fill(0);
weight.fill(0.)
print"===== FLAG_ROW and WEIGHT columns open successfuly ====="
except:
print "===== FLAG_ROW or WEIGHT colums doen't exits ====="
# timing the processes
from time import gmtime, strftime
# run across the averaging data
for p,q,datacom,flagrowpq,weightpq,flagfreqpq in arrays:
print "%s +++ saving bd-avg visibilities for baseline (%d,%d) +++"%(strftime("%Y-%m-%d %H:%M:%S", gmtime()),p,q)
data[(a1==p)&(a2==q),...] = datacom.copy();
flagrow[(a1==p)&(a2==q),...] = flagrowpq.copy();
weight[(a1==p)&(a2==q),...] = weightpq.copy();
flagfreq[(a1==p)&(a2==q),...] = flagfreqpq.copy()
# save the averaging data to the hard MS
tab.putcol(column,data)
tab.putcol("FLAG_ROW",flagrow)
tab.putcol("WEIGHT",weight)
tab.putcol("FLAG",flagfreq)
print "save succeded"
tab.close();
class MSResampler (object):
"""iClass for reading and resampling data from an MS"""
def __init__ (self,msname,column="DATA"):
"""Inits with given MS and column name.
If time0/ntime and freq0/nfreq is given, handles only a subset of the data,
otherwise reads all channels and timeslots.
""";
#os.system("addbitflagcol %s"%msname)
self.msname = msname;
tab = table(msname,ack=False,readonly=False)
self.A0 = A0 = tab.getcol('ANTENNA1')
self.A1 = A1 = tab.getcol('ANTENNA2')
# load the data and the weights
data = tab.getcol(column);
data_desc_id = tab.getcol("DATA_DESC_ID")
weight = tab.getcol("WEIGHT")
flagfreq = tab.getcol("FLAG")
nfreq = data.shape[1]
self.data = data.copy();
self.data_desc_id = data_desc_id.copy()
self.weight = weight.copy()
print "Visibility column shape:",data.shape
self.na = na = np.max(A1)+1
# do not consider auto-correlation
self.nbl = (na*(na-1))/2
self.ncorr = data.shape[2]
self.nbins = data.shape[0]
self.nfreq = nfreq
self.flagfreq = flagfreq.copy();
self.UVW = tab.getcol("UVW")
# get frequency and wavelength (per channel) from SPECTRAL_WINDOW subtable
#t2 = table(tab.getkeyword("SPECTRAL_WINDOW"),readonly=False)
#self.channels = t2.getcol("CHAN_FREQ",0);
#self.freqs = t2.getcol("CHAN_FREQ",0)[0,freq0:freq0+nfreq];
#t2.close()
tab.close()
def bd_averaging (self,dtime,dfreq,psh,qsh):
"""Downsamples data using baseline dependent averaging. The compression is done over a boxcar window of size dfreqxdtimepq
Returns list of (antenna1,antenna2,dtimepq,averaged_data) tuples, one per each baseline, where
averaged_data has the shape (ntime/dtimepq,nfreq/dfreq,ncorr)."""
# this is the frequency output shape
# Notice that the time output depends on the baseline length
nfreq1 = self.nfreq/dfreq;
# make a list of per-baseline output arrays
result = [];
# shortest baseline length, longest baseline p=4 and q=17, VLA D configuration
lengthsh = np.sqrt((self.UVW[(self.A0==psh)&(self.A1==qsh)]**2).sum(axis=1))[0];
# loop over each baseline
# Evaluate the integration time for all baseline, given the one of the shortest baseline
for p in range(self.na):
for q in range(p+1,self.na):
# extract full array for this baseline, apply subset in time
input_index = (self.A0==p)&(self.A1==q)
# extract the data and the spectral windows indexes for this baseline
data = self.data[input_index].copy();
# flagging frequency column
flagfreqpq = self.flagfreq[input_index].copy();
flagfreqpqkept = np.zeros_like(flagfreqpq)
# output data
datacom = np.zeros_like(data)
# data description for this baseline
data_desc_id = self.data_desc_id[input_index].copy()
# prepare the output flag columns
flagrowpq = np.zeros(data.shape[0],dtype=int)
# prepare weight columns for this baseline
weightpq = self.weight[input_index].copy()
# uvw bin for this baseline
uvw=self.UVW[(self.A0==p)&(self.A1==q)]
# the baselines with 0 data are fictives in VLA for this observations. Do not consisder
# One option is to remove these fictives Antennas from the Antenna table
if len(data)!=0:
# evaluate the integration time of this baseline, NB : dtime is the integration time for the shortest baseline
# length for this baseline
lengthpq = np.sqrt((uvw**2).sum(axis=1))[0];
# compresion timeslots or compression time = dtimepq*integration time
dtimepq = int(np.ceil(dtime/(lengthpq/lengthsh)));
# compression frequency or bandwidth
dfreqpq = int(np.ceil(dfreq/(lengthpq/lengthsh)));
# make the number of bins to average be an odd number
dtimepq = dtimepq+1 if dtimepq%2==0 else dtimepq
dfreqpq = dfreqpq+1 if dfreqpq%2==0 else dfreqpq
#lengthdtdf.append((lengthpq,dtimepq,dfreqpq))
from time import gmtime, strftime
print """%s +++ Baseline Dependent Averaging: Baseline (%d,%d), integration =%ds, bandwidth =%.1fMHz
this may take some time +++"""%(strftime("%Y-%m-%d %H:%M:%S", gmtime()),p,q,dtimepq,(dfreqpq*10))
# number of spectral windows
nbsw = data_desc_id.max()+1;
# prepare list for result of each spectral window
resultsw = []
# extract the data of each spectract window, NB: number of spectral window = data_desc_id.max()+1;
for idw in range(nbsw):
# extract data for this spectral window
dataid = data[data_desc_id==idw].copy();
datacomid = datacom[data_desc_id==idw].copy()
flagrowpqid = flagrowpq[data_desc_id==idw].copy()
weightpqid = weightpq[data_desc_id==idw].copy()
flagfreqpqid = flagfreqpq[data_desc_id==idw].copy()
flagfreqpqkeptid = flagfreqpqkept[data_desc_id==idw].copy()
# work across each spectral window for this baseline
ntime = (dataid.shape[0]//dtimepq)*dtimepq
for time0 in range(0,ntime,dtimepq):
# keep indice at the end of the compression range
time1 = time0 + dtimepq;
# created three time slice;
# time slice for the overall date to average
timeslicesw = slice(time0,time1)
# time slices for the left hand side and right hand side where to flag
timesliceflag1 = slice(time0,time0+(time1-time0)/2)
timesliceflag2 = slice(time0+(time1-time0)/2 +1,time1)
# time slice where to save the average data
timeslicedata = slice(time0+(time1-time0)/2,time0+(time1-time0)/2 +1)
# extract visibilities to average
datatoavg = dataid[timeslicesw].copy()
# extract the hires flagging columns for this id
flagfreqtoavg = flagfreqpqid[timeslicesw].copy()
# average accros frequency here
datatoavg, flagfreqtoavg = self.bd_averaging_frequency (datatoavg, flagfreqtoavg, dfreqpq)
# average across time
datacomid[timeslicedata,...] = (datatoavg.mean(0)).copy()
# evaluate the new wait and save
weightpqid[timeslicedata,...] = (weightpqid[timeslicesw,...].sum(0)).copy()
# keep the new flagging information across frequencies
flagfreqpqkeptid[timeslicesw,...] = flagfreqtoavg.copy()
#flagfreqpqkeptid[timeslicedata,...][datacomid[timeslicedata,...]==0.] = 1
# flag in time
flagrowpqid[timesliceflag1] = 1;
flagrowpqid[timesliceflag2] = 1;
# this is the last time slice, in the case we still have the rest of data less than dtimepq
if time0 < dataid.shape[0]:
# for exemple if dtime =10 and here dataid.shape[0]-time<dtime;
timeslicesw = slice(time0,dataid.shape[0])
timesliceflag1 = slice(time0,time0+(dataid.shape[0]-time0)/2);
timesliceflag2 = slice(time0+(dataid.shape[0]-time0)/2 +1,dataid.shape[0]);
timeslicedata = slice(time0+(dataid.shape[0]-time0)/2, time0+(dataid.shape[0]-time0)/2 +1);
datatoavg = dataid[timeslicesw].copy()#reshape((dataid.shape[0]-time1,nfreq1,dfreq,self.ncorr))
flagfreqtoavg = flagfreqpqid[timeslicesw].copy()
datatoavg, flagfreqtoavg = self.bd_averaging_frequency (datatoavg, flagfreqtoavg, dfreqpq)
datacomid[timeslicedata,...] = (datatoavg.mean(0)).copy()
weightpqid[timeslicedata,...] = (weightpqid[timeslicesw,...].sum(0)).copy()
flagfreqpqkeptid[timeslicesw,...] = flagfreqtoavg.copy()
#flagfreqpqkeptid[timeslicedata,...][datacomid[timeslicedata,...]==0.] = 1
flagrowpqid[timesliceflag1] = 1;
flagrowpqid[timesliceflag2] = 1;
# keep informations for this baseline
datacom[data_desc_id==idw] = datacomid.copy();
flagrowpq[data_desc_id==idw] = flagrowpqid.copy()
weightpq[data_desc_id==idw] = weightpqid.copy()
flagfreqpqkept[data_desc_id==idw] = flagfreqpqid.copy()
# save the result
result.append((p,q,datacom,flagrowpq,weightpq,flagfreqpqkept));
return result;
def bd_averaging_frequency (self, datafreq_pq, flagfreqtoavg_pq, dfreq):
"""
baseline dependent averaging across frequency, given
the data for a baseline and the number of samples to average.
"""
#freq0 = 0;
#freq1 = freq0 + dfreq;
flagfreqkept = np.zeros_like(flagfreqtoavg_pq)
nfreq = (datafreq_pq.shape[1]//dfreq)*dfreq
#while datafreq_pq.shape[1]-freq0 >= dfreq:
for freq0 in range(0,nfreq,dfreq):
# keep indice at the end of the compression range
freq1 = freq0 + dfreq;
# prepare the samples to average freq0 + dfreq
freqslicesw = slice(freq0,freq1)
# prepare the flaging columns
freqsliceflag1 = slice(freq0,freq0+(freq1-freq0)/2)
freqsliceflag2 = slice(freq0+(freq1-freq0)/2 +1,freq1)
freqslicedata = slice(freq0+(freq1-freq0)/2,freq0+(freq1-freq0)/2 +1)
data_avg_freq = datafreq_pq[:,freqslicesw,...].copy()
flagfreqtoavg = flagfreqtoavg_pq[:,freqslicesw,...].copy()
flagfreqkept[:,freqsliceflag1,...] = 1;
flagfreqkept[:,freqsliceflag2,...] = 1;
# average were the hires visibities are not flag. flagfreqtoavg is the mask, average where flagfreqtoavg is False
datafreq_pq[:,freqslicedata,...] = np.mean(data_avg_freq,axis=(1))\
.reshape(data_avg_freq.shape[0],1,data_avg_freq.shape[2])
if freq0 < datafreq_pq.shape[1]:
freqslicesw = slice(freq0,datafreq_pq.shape[1])
freqsliceflag1 = slice(freq0,freq0+(datafreq_pq.shape[1]-freq0)/2);
freqsliceflag2 = slice(freq0+(datafreq_pq.shape[1]-freq0)/2 +1,datafreq_pq.shape[1]);
freqslicedata = slice(freq0+(datafreq_pq.shape[1]-freq0)/2, freq0+(datafreq_pq.shape[1]-freq0)/2 +1);
data_avg_freq = datafreq_pq[:,freqslicesw,...].copy()
flagfreqtoavg = flagfreqtoavg_pq[:,freqslicesw,...].copy()
flagfreqkept[:,freqsliceflag1,...] = 1;
flagfreqkept[:,freqsliceflag2,...] = 1;
# average were the hires visibities are not flag. flagfreqtoavg is the mask, average where flagfreqtoavg is False
datafreq_pq[:,freqslicedata,...] = np.mean(data_avg_freq,axis=(1))\
.reshape(data_avg_freq.shape[0],1,data_avg_freq.shape[2])
return datafreq_pq, flagfreqkept;