Skip to content

Commit d62976f

Browse files
committed
update frequency of updates column
1 parent 56cdd01 commit d62976f

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

application/data_access/overview/source_and_resource_queries.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import re
23

34
import pandas as pd
45

@@ -153,19 +154,36 @@ def get_typology():
153154
return [dict(zip(columns, row)) for row in rows.to_numpy()]
154155

155156

157+
def get_spec_filename(dataset_urls):
158+
if pd.isna(dataset_urls) or not isinstance(dataset_urls, str):
159+
return []
160+
161+
urls = dataset_urls.split(";")
162+
163+
# extract dataset name from specification file URL
164+
filenames = [
165+
re.search(r"/([^/]+)\.md", url).group(1)
166+
for url in urls
167+
if re.search(r"/([^/]+)\.md", url)
168+
]
169+
170+
return filenames
171+
172+
156173
def get_frequency():
157174
# used by get_datasets_summary
158175
df = pd.read_csv(
159176
"https://design.planning.data.gov.uk/planning-consideration/planning-considerations.csv"
160177
)
161178

162-
if "name" not in df.columns or "frequency-of-updates" not in df.columns:
179+
if "datasets" not in df.columns or "frequency-of-updates" not in df.columns:
163180
raise ValueError(
164-
"CSV must contain 'dataset' and 'frequency-of-updates' columns"
181+
"CSV must contain 'datasets' and 'frequency-of-updates' columns"
165182
)
166183

167-
df.rename(columns={"slug": "pipeline"}, inplace=True)
168184
df["frequency-of-updates"].fillna("", inplace=True)
185+
df["pipeline"] = df["datasets"].apply(get_spec_filename)
186+
df = df.explode("pipeline")
169187
columns = ["pipeline", "frequency-of-updates"]
170188
data = df[columns].to_dict(orient="records")
171189

0 commit comments

Comments
 (0)