Commit d62976f 1 parent 56cdd01 commit d62976f Copy full SHA for d62976f
File tree 1 file changed +21
-3
lines changed
application/data_access/overview
1 file changed +21
-3
lines changed Original file line number Diff line number Diff line change 1
1
import datetime
2
+ import re
2
3
3
4
import pandas as pd
4
5
@@ -153,19 +154,36 @@ def get_typology():
153
154
return [dict (zip (columns , row )) for row in rows .to_numpy ()]
154
155
155
156
157
+ def get_spec_filename (dataset_urls ):
158
+ if pd .isna (dataset_urls ) or not isinstance (dataset_urls , str ):
159
+ return []
160
+
161
+ urls = dataset_urls .split (";" )
162
+
163
+ # extract dataset name from specification file URL
164
+ filenames = [
165
+ re .search (r"/([^/]+)\.md" , url ).group (1 )
166
+ for url in urls
167
+ if re .search (r"/([^/]+)\.md" , url )
168
+ ]
169
+
170
+ return filenames
171
+
172
+
156
173
def get_frequency ():
157
174
# used by get_datasets_summary
158
175
df = pd .read_csv (
159
176
"https://design.planning.data.gov.uk/planning-consideration/planning-considerations.csv"
160
177
)
161
178
162
- if "name " not in df .columns or "frequency-of-updates" not in df .columns :
179
+ if "datasets " not in df .columns or "frequency-of-updates" not in df .columns :
163
180
raise ValueError (
164
- "CSV must contain 'dataset ' and 'frequency-of-updates' columns"
181
+ "CSV must contain 'datasets ' and 'frequency-of-updates' columns"
165
182
)
166
183
167
- df .rename (columns = {"slug" : "pipeline" }, inplace = True )
168
184
df ["frequency-of-updates" ].fillna ("" , inplace = True )
185
+ df ["pipeline" ] = df ["datasets" ].apply (get_spec_filename )
186
+ df = df .explode ("pipeline" )
169
187
columns = ["pipeline" , "frequency-of-updates" ]
170
188
data = df [columns ].to_dict (orient = "records" )
171
189
You can’t perform that action at this time.
0 commit comments