-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcheck_lgz.py
executable file
·238 lines (206 loc) · 8.42 KB
/
check_lgz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#!/usr/bin/env python
# Check what needs doing next for RGZ.
from __future__ import print_function
from surveys_db import SurveysDB
from astropy.table import Table
from panoptes_client import Panoptes, Project, SubjectSet, Subject, Workflow
import os
import glob
from time import sleep
import datetime
import threading
target='/data/lofar/DR2/RGZ/'
lgz=os.environ['LGZPATH']
for k in ['LOTSS_COMPONENT_CATALOGUE','PANOPTES_PASSWORD']:
if k not in os.environ:
raise RuntimeError('Variable %s not set' % k)
def update_status(id,**kwargs):
with SurveysDB() as sdb:
r=sdb.get_field(id)
for k in kwargs:
r[k]=kwargs[k]
sdb.set_field(r)
def create_rgz_input(id):
t=Table.read(os.environ['LOTSS_COMPONENT_CATALOGUE'])
filt=t['Total_flux']>8
filt&=t['Mosaic_ID']==id
filt&=t['Maj']>15
filt&=t['Peak_flux']>2*t['Isl_rms']
st=t[filt]
print('LGZ targets',len(st))
tdir=target+id
if not os.path.isdir(tdir):
os.mkdir(tdir)
st.write(tdir+'/'+id+'.fits',overwrite=True)
return len(st)
def download(id):
print('Checking sources in %s for LGZ input' % id)
n=create_rgz_input(id)
print('Downloading image files')
update_status(id,rgz_sources=n,gz_status="Downloading")
result=os.system('cd %s; python %s/utils/download_image_files_legacy.py %s' % (target+id, lgz, id+'.fits'))
if result==0:
update_status(id,gz_status="Downloaded")
else:
raise RuntimeError('Download failed!')
def check_fields():
# check for incorrect numbers of sources
with SurveysDB(readonly=True) as sdb:
sdb.cur.execute('select * from fields where gz_status="Created" or gz_status="In progress"')
res=sdb.cur.fetchall()
for r in res:
id=r['id']
n=r['rgz_sources']
nr=len(glob.glob(target+id+'/*-manifest.txt'))
if n!=nr:
print(id,n,nr)
update_status(id,rgz_sources=nr)
def make_images(id):
print('Making images for %s' % id)
update_status(id,gz_status="Creating images")
result=os.system('cd %s; python %s/lgz_create/make_overlays_legacy_cscale.py %s 0 9999' % (target+id, lgz, id+'.fits'))
if result==0:
n=len(glob.glob(target+id+'/*manifest.txt'))
if n==0:
update_status(id,gz_status='No sources')
else:
update_status(id,gz_status='Created',rgz_sources=n)
else:
raise RuntimeError('Make images failed!')
def upload_images(id,use_database=True):
print('Create subject set and upload images for',id)
if use_database:
update_status(id,gz_status='Uploading')
wd=os.getcwd()
Panoptes.connect(username='mjh22',password=os.environ['PANOPTES_PASSWORD'])
os.chdir(target+id)
project = Project.find(slug='chrismrp/radio-galaxy-zoo-lofar')
subject_set = SubjectSet()
subject_set.display_name=id
subject_set.links.project=project
subject_set.save()
print('Made subject set')
new_subjects = []
g=glob.glob('*-manifest.txt')
for i,f in enumerate(g):
bits=open(f).readlines()[0].split(',')
metadata={'subject_id':int(bits[0]),'ra':float(bits[5]),'dec':float(bits[6]),'#size':float(bits[7]),'source_name':bits[4]}
print('Upload doing',bits[4],'%i/%i' % (i,len(g)))
subject = Subject()
subject.links.project = project
subject.metadata.update(metadata)
for location in bits[1:4]:
subject.add_location(location)
subject.save()
new_subjects.append(subject)
subject_set.add(new_subjects)
workflow=Workflow(11973)
workflow.links.subject_sets.add(subject_set)
if use_database:
update_status(id,gz_status='In progress')
print('Done!')
if __name__=='__main__':
download_thread=None
download_name=None
create_thread=None
create_name=None
upload_thread=None
upload_name=None
while True:
with SurveysDB(readonly=True) as sdb:
sdb.cur.execute('select id,gz_status,weave_priority,rgz_sources,rgz_complete from fields where dr2=1 order by weave_priority')
results=sdb.cur.fetchall()
d={}
for r in results:
status=r['gz_status']
if status in d:
d[status]=d[status]+1
else:
d[status]=1
limit=int(open('/home/mjh/pipeline-master/lotss-hba-survey/misc/lgz-limit.txt').readlines()[0].rstrip())
print('\n\n-----------------------------------------------\n\n')
print('LGZ status')
print(datetime.datetime.now())
print()
for k in sorted(d.keys()):
print("%-20s : %i" % (k,d[k]))
total=0
tremain=0
ctotal=0
ftotal=0
for r in results:
if r['gz_status']=='In progress' and r['rgz_sources'] is not None:
total+=r['rgz_sources']
tremain+=r['rgz_sources']
if r['rgz_complete'] is not None:
tremain-=r['rgz_complete']
if r['gz_status']=='Created':
ctotal+=r['rgz_sources']
if r['rgz_complete'] is not None:
ftotal+=r['rgz_complete']
print('Total sources in fields in progress',total,'of which',tremain,'are not retired')
print('Non-retired lower limit is',limit)
print('Total sources created but not uploaded',ctotal)
print('Total sources retired',ftotal)
if download_thread is not None:
print('Download thread is running (%s)' % download_name)
if create_thread is not None:
print('Create thread is running (%s)' % create_name)
if upload_thread is not None:
print('Upload thread is running (%s)' % upload_name)
if download_thread is not None and not download_thread.isAlive():
print('Download thread seems to have terminated')
download_thread=None
if create_thread is not None and not create_thread.isAlive():
print('Create thread seems to have terminated')
create_thread=None
if upload_thread is not None and not upload_thread.isAlive():
print('Upload thread seems to have terminated')
upload_thread=None
non_running=None
for r in results:
if r['gz_status'] is None:
non_running=r['id']
print('First non-running field is',r['id'])
break
if non_running is not None and ctotal<limit and download_thread is None:
download_name=non_running
print('We need to download a new file (%s)!' % download_name)
download_thread=threading.Thread(target=download, args=(download_name,))
download_thread.start()
if 'Downloaded' in d and create_thread is None:
for r in results:
if r['gz_status']=='Downloaded':
create_name=r['id']
create_thread=threading.Thread(target=make_images, args=(create_name,))
create_thread.start()
break
if 'Created' in d and tremain<limit and upload_thread is None:
for r in results:
if r['gz_status']=='Created':
upload_name=r['id']
upload_thread=threading.Thread(target=upload_images, args=(upload_name,))
upload_thread.start()
break
# Here we should:
# -- keep track of how many sources have gone into active fields
# -- if more are needed, make them, one mosaic at a time. update when the images are ready
# -- steps are: download_image_files_legacy.py (prob about 10 mins)
# -- make_images (make_overlays_legacy_scale.py)
# NB IMAGEDIR and LOTSS_COMPONENT_CATALOGUE need to be set
# -- if images are ready to upload, upload them
# -- later we can also extract results from RGZ
# Do we want to update the component catalogue?
'''
if r is not None and total<limit:
id=r['id']
if r['gz_status'] is None:
download(id)
elif r['gz_status']=='Downloaded':
make_images(id)
elif r['gz_status']=='Created':
upload_images(id)
else:
print 'Cannot do anything with status',r['gz_status']
'''
sleep(60)