Skip to content

Commit 0120801

Browse files
Merge pull request #40 from digital-land/internal_error_graph
Internal error graph
2 parents d62976f + 62f253f commit 0120801

17 files changed

+3877
-3790
lines changed

application/blueprints/report/views.py

+5
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
get_contributions_and_errors_by_day,
5151
get_endpoint_errors_and_successes_by_week,
5252
get_endpoints_added_by_week,
53+
get_internal_issues_by_day,
5354
get_issue_counts,
5455
)
5556
from application.utils import (
@@ -78,6 +79,9 @@ def overview():
7879
endpoint_successes_percentages_timeseries,
7980
endpoint_errors_percentages_timeseries,
8081
) = get_endpoint_errors_and_successes_by_week(contributions_and_errors_by_day_df)
82+
83+
internal_errors_timeseries = get_internal_issues_by_day()
84+
8185
summary_metrics = {
8286
"contributions": summary_contributions,
8387
"endpoint_errors": summary_endpoint_errors,
@@ -89,6 +93,7 @@ def overview():
8993
"endpoint_successes_timeseries": endpoint_successes_timeseries,
9094
"endpoint_successes_percentages_timeseries": endpoint_successes_percentages_timeseries,
9195
"endpoint_errors_percentages_timeseries": endpoint_errors_percentages_timeseries,
96+
"internal_errors_timeseries": internal_errors_timeseries,
9297
}
9398

9499
issue_summary = get_issue_summary()
+144-144
Original file line numberDiff line numberDiff line change
@@ -1,144 +1,144 @@
1-
import datetime
2-
import logging
3-
4-
import pandas as pd
5-
import requests
6-
from requests import adapters
7-
from urllib3 import Retry
8-
9-
10-
def get_datasette_http():
11-
"""
12-
Function to return http for the use of querying datasette,
13-
specifically to add retries for larger queries
14-
"""
15-
retry_strategy = Retry(total=3, status_forcelist=[400], backoff_factor=0)
16-
17-
adapter = adapters.HTTPAdapter(max_retries=retry_strategy)
18-
19-
http = requests.Session()
20-
http.mount("https://", adapter)
21-
http.mount("http://", adapter)
22-
23-
return http
24-
25-
26-
def get_datasette_query(
27-
db, sql, filter=None, url="https://datasette.planning.data.gov.uk"
28-
):
29-
url = f"{url}/{db}.json"
30-
params = {"sql": sql, "_shape": "array", "_size": "max"}
31-
if filter:
32-
params.update(filter)
33-
try:
34-
http = get_datasette_http()
35-
resp = http.get(url, params=params)
36-
resp.raise_for_status()
37-
df = pd.DataFrame.from_dict(resp.json())
38-
return df
39-
except Exception as e:
40-
logging.warning(e)
41-
return None
42-
43-
44-
def get_datasette_query_issue_summary(
45-
db, filter=None, url="https://datasette.planning.data.gov.uk"
46-
):
47-
url = f"{url}/{db}.json"
48-
params = {}
49-
50-
if filter:
51-
params.update(filter)
52-
53-
try:
54-
http = get_datasette_http()
55-
all_rows = []
56-
57-
while True:
58-
"""
59-
Datasette returns a max of 1000 rows. This should be able to be changed but for now,
60-
if there is more than 1000 rows, a pagination next will be returned in the response.
61-
We can use this to fetch the next 1000 rows repeatedly until all rows have been accumulated.
62-
"""
63-
64-
resp = http.get(url, params=params)
65-
response_json = resp.json()
66-
rows = response_json.get("rows", [])
67-
68-
# Accumulate rows
69-
all_rows.extend(rows)
70-
71-
# Check if there's a "next" token for pagination
72-
next_token = response_json.get("next")
73-
if not next_token:
74-
break
75-
76-
params["_next"] = next_token
77-
if all_rows and response_json.get("columns"):
78-
df = pd.DataFrame(all_rows, columns=response_json["columns"])
79-
return df
80-
else:
81-
logging.error("No rows or columns available to create a DataFrame")
82-
return None
83-
84-
except Exception as e:
85-
logging.warning(f"Exception occurred: {e}")
86-
return None
87-
88-
89-
# def get_datasets_summary():
90-
# # get all the datasets listed with their active status
91-
# all_datasets = index_by("dataset", get_datasets())
92-
# missing = []
93-
94-
# # add the publisher coverage numbers
95-
# dataset_coverage = publisher_coverage()
96-
# for d in dataset_coverage:
97-
# if all_datasets.get(d["pipeline"]):
98-
# all_datasets[d["pipeline"]] = {**all_datasets[d["pipeline"]], **d}
99-
# else:
100-
# missing.append(d["pipeline"])
101-
102-
# # add the total resource count
103-
# dataset_resource_counts = resources_by_dataset()
104-
# for d in dataset_resource_counts:
105-
# if all_datasets.get(d["pipeline"]):
106-
# all_datasets[d["pipeline"]] = {**all_datasets[d["pipeline"]], **d}
107-
# else:
108-
# missing.append(d["pipeline"])
109-
110-
# # add the first and last resource dates
111-
# dataset_resource_dates = first_and_last_resource()
112-
# for d in dataset_resource_dates:
113-
# if all_datasets.get(d["pipeline"]):
114-
# all_datasets[d["pipeline"]] = {**all_datasets[d["pipeline"]], **d}
115-
# else:
116-
# missing.append(d["pipeline"])
117-
118-
# return all_datasets
119-
120-
121-
def generate_weeks(number_of_weeks=None, date_from=None):
122-
now = datetime.datetime.now()
123-
monday = now - datetime.timedelta(days=now.weekday())
124-
dates = []
125-
126-
if date_from:
127-
date = datetime.datetime.strptime(date_from, "%Y-%m-%d")
128-
while date < now:
129-
week_number = int(date.strftime("%W"))
130-
year_number = int(date.year)
131-
dates.append(
132-
{"date": date, "week_number": week_number, "year_number": year_number}
133-
)
134-
date = date + datetime.timedelta(days=7)
135-
return dates
136-
elif number_of_weeks:
137-
for week in range(0, number_of_weeks):
138-
date = monday - datetime.timedelta(weeks=week)
139-
week_number = int(date.strftime("%W"))
140-
year_number = int(date.year)
141-
dates.append(
142-
{"date": date, "week_number": week_number, "year_number": year_number}
143-
)
144-
return list(reversed(dates))
1+
import datetime
2+
import logging
3+
4+
import pandas as pd
5+
import requests
6+
from requests import adapters
7+
from urllib3 import Retry
8+
9+
10+
def get_datasette_http():
11+
"""
12+
Function to return http for the use of querying datasette,
13+
specifically to add retries for larger queries
14+
"""
15+
retry_strategy = Retry(total=3, status_forcelist=[400], backoff_factor=0)
16+
17+
adapter = adapters.HTTPAdapter(max_retries=retry_strategy)
18+
19+
http = requests.Session()
20+
http.mount("https://", adapter)
21+
http.mount("http://", adapter)
22+
23+
return http
24+
25+
26+
def get_datasette_query(
27+
db, sql, filter=None, url="https://datasette.planning.data.gov.uk"
28+
):
29+
url = f"{url}/{db}.json"
30+
params = {"sql": sql, "_shape": "array", "_size": "max"}
31+
if filter:
32+
params.update(filter)
33+
try:
34+
http = get_datasette_http()
35+
resp = http.get(url, params=params)
36+
resp.raise_for_status()
37+
df = pd.DataFrame.from_dict(resp.json())
38+
return df
39+
except Exception as e:
40+
logging.warning(e)
41+
return None
42+
43+
44+
def get_datasette_query_issue_summary(
45+
db, filter=None, url="https://datasette.planning.data.gov.uk"
46+
):
47+
url = f"{url}/{db}.json"
48+
params = {}
49+
50+
if filter:
51+
params.update(filter)
52+
53+
try:
54+
http = get_datasette_http()
55+
all_rows = []
56+
57+
while True:
58+
"""
59+
Datasette returns a max of 1000 rows. This should be able to be changed but for now,
60+
if there is more than 1000 rows, a pagination next will be returned in the response.
61+
We can use this to fetch the next 1000 rows repeatedly until all rows have been accumulated.
62+
"""
63+
64+
resp = http.get(url, params=params)
65+
response_json = resp.json()
66+
rows = response_json.get("rows", [])
67+
68+
# Accumulate rows
69+
all_rows.extend(rows)
70+
71+
# Check if there's a "next" token for pagination
72+
next_token = response_json.get("next")
73+
if not next_token:
74+
break
75+
76+
params["_next"] = next_token
77+
if all_rows and response_json.get("columns"):
78+
df = pd.DataFrame(all_rows, columns=response_json["columns"])
79+
return df
80+
else:
81+
logging.error("No rows or columns available to create a DataFrame")
82+
return None
83+
84+
except Exception as e:
85+
logging.warning(f"Exception occurred: {e}")
86+
return None
87+
88+
89+
# def get_datasets_summary():
90+
# # get all the datasets listed with their active status
91+
# all_datasets = index_by("dataset", get_datasets())
92+
# missing = []
93+
94+
# # add the publisher coverage numbers
95+
# dataset_coverage = publisher_coverage()
96+
# for d in dataset_coverage:
97+
# if all_datasets.get(d["pipeline"]):
98+
# all_datasets[d["pipeline"]] = {**all_datasets[d["pipeline"]], **d}
99+
# else:
100+
# missing.append(d["pipeline"])
101+
102+
# # add the total resource count
103+
# dataset_resource_counts = resources_by_dataset()
104+
# for d in dataset_resource_counts:
105+
# if all_datasets.get(d["pipeline"]):
106+
# all_datasets[d["pipeline"]] = {**all_datasets[d["pipeline"]], **d}
107+
# else:
108+
# missing.append(d["pipeline"])
109+
110+
# # add the first and last resource dates
111+
# dataset_resource_dates = first_and_last_resource()
112+
# for d in dataset_resource_dates:
113+
# if all_datasets.get(d["pipeline"]):
114+
# all_datasets[d["pipeline"]] = {**all_datasets[d["pipeline"]], **d}
115+
# else:
116+
# missing.append(d["pipeline"])
117+
118+
# return all_datasets
119+
120+
121+
def generate_weeks(number_of_weeks=None, date_from=None):
122+
now = datetime.datetime.now()
123+
monday = now - datetime.timedelta(days=now.weekday())
124+
dates = []
125+
126+
if date_from:
127+
date = datetime.datetime.strptime(date_from, "%Y-%m-%d")
128+
while date < now:
129+
week_number = int(date.strftime("%W"))
130+
year_number = int(date.year)
131+
dates.append(
132+
{"date": date, "week_number": week_number, "year_number": year_number}
133+
)
134+
date = date + datetime.timedelta(days=7)
135+
return dates
136+
elif number_of_weeks:
137+
for week in range(0, number_of_weeks):
138+
date = monday - datetime.timedelta(weeks=week)
139+
week_number = int(date.strftime("%W"))
140+
year_number = int(date.year)
141+
dates.append(
142+
{"date": date, "week_number": week_number, "year_number": year_number}
143+
)
144+
return list(reversed(dates))

0 commit comments

Comments
 (0)