@@ -179,29 +179,32 @@ def process(self, stream):
179
179
)
180
180
181
181
# check applied for organisations that have provided a document dataset
182
- params = urllib .parse .urlencode (
183
- {
184
- "sql" : f"""select organisation from provision_summary where active_endpoint_count > 0 and dataset == '{ linked_dataset } '""" ,
185
- "_size" : "max" ,
186
- }
187
- )
188
- base_url = f"https://datasette.planning.data.gov.uk/performance.csv?{ params } "
189
-
190
- max_retries = 60 # Retry for an hour
191
- for attempt in range (max_retries ):
192
- try :
193
- get_lpa = pd .read_csv (base_url )
194
- break
195
- except urllib .error .HTTPError :
196
- time .sleep (60 )
197
- else :
198
- raise Exception (
199
- "Failed to fetch datasette after multiple attempts"
182
+ if not hasattr (
183
+ self , "lpa_list"
184
+ ): # check if data fetched already
185
+ params = urllib .parse .urlencode (
186
+ {
187
+ "sql" : f"""select organisation from provision_summary where active_endpoint_count > 0 and dataset == '{ linked_dataset } '""" ,
188
+ "_size" : "max" ,
189
+ }
200
190
)
191
+ base_url = f"https://datasette.planning.data.gov.uk/performance.csv?{ params } "
192
+
193
+ max_retries = 60 # Retry for an hour
194
+ for attempt in range (max_retries ):
195
+ try :
196
+ get_lpa = pd .read_csv (base_url )
197
+ self .lpa_list = get_lpa ["organisation" ].to_list ()
198
+ break
199
+ except urllib .error .HTTPError :
200
+ if attempt < max_retries - 1 :
201
+ time .sleep (60 )
202
+ else :
203
+ raise Exception (
204
+ "Failed to fetch datasette after multiple attempts"
205
+ )
201
206
202
- lpa_list = get_lpa ["organisation" ].to_list ()
203
-
204
- if row .get ("organisation" , "" ) in lpa_list :
207
+ if row .get ("organisation" , "" ) in self .lpa_list :
205
208
reference = row .get (linked_dataset , "" )
206
209
207
210
find_entity = self .lookup (
0 commit comments