generated from ministryofjustice/hmpps-template-kotlin
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcommon.py
200 lines (171 loc) · 8.95 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""A module containing static variables and common methods all in one place"""
import os
import requests
import yaml
import pandas as pd
DEFAULT_URL = "https://prison-api-dev.prison.service.justice.gov.uk/v3/api-docs"
URLS = [
#API Docs
"https://manage-adjudications-api-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://assess-risks-and-needs-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://raw.githubusercontent.com/ministryofjustice/hmpps-complexity-of-need/main/Complexity%20Of%20Need%20API%20Specification.yaml",
# "https://court-register-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://raw.githubusercontent.com/ministryofjustice/curious-API/main/curious-api-specification.yaml",
"https://sign-in-dev.hmpps.service.justice.gov.uk/auth/v3/api-docs",
# "https://keyworker-api-dev.prison.service.justice.gov.uk/v3/api-docs",
# "https://allocation-manager-staging.apps.live-1.cloud-platform.service.justice.gov.uk/api-docs/index.html",
"https://community-api.test.probation.service.justice.gov.uk/v3/api-docs/Community%20API",
# "https://probation-offender-events-dev.hmpps.service.justice.gov.uk/swagger-ui.html",
"https://probation-offender-search-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://prison-api-dev.prison.service.justice.gov.uk/v3/api-docs", #"https://api-dev.prison.service.justice.gov.uk/v3/api-docs" doesn't work anymore
"https://offender-events-dev.prison.service.justice.gov.uk/v3/api-docs",
"https://prisoner-search-dev.prison.service.justice.gov.uk/v3/api-docs",
"https://offender-dev.aks-dev-1.studio-hosting.service.justice.gov.uk/v3/api-docs",
"https://prison-register-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://hmpps-allocations-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://probation-teams-dev.prison.service.justice.gov.uk/v3/api-docs",
"https://hmpps-interventions-service-dev.apps.live-1.cloud-platform.service.justice.gov.uk/v3/api-docs",
"https://restricted-patients-api-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://hmpps-staff-lookup-service-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://hmpps-tier-dev.hmpps.service.justice.gov.uk/v3/api-docs",
"https://token-verification-api-dev.prison.service.justice.gov.uk/v3/api-docs",
"https://hmpps-workload-dev.hmpps.service.justice.gov.uk/v3/api-docs",
#Microservices
"https://raw.githubusercontent.com/ministryofjustice/hmpps-complexity-of-need/main/Complexity%20Of%20Need%20API%20Specification.yaml"
]
OUTPUTS_DIR = "outputs/"
SCHEMA_SEARCH_REPORT = OUTPUTS_DIR + "schema_report.csv"
PATH_SEARCH_REPORT = OUTPUTS_DIR + "path_report.csv"
SCHEMA_FIELD_FILE = "outputs/schema_field.csv"
SCHEMA_PARENT_CHILD_FILE = "outputs/schema_parent_child.csv"
SCHEMA_DIAGRAM = "outputs/schema_hierachy.dot"
ENDPOINTS_FILE = "outputs/endpoint_analysis.csv"
TIMEOUT = 10
def prepare_directory(filename=""):
"""
A function to prepare the current directory for outputs of any script
You can provide an optional filename parameter to delete that file if it exists.
This is to get a clean output file.
Parameters:
(optional) filename (str): The name of the output file to be removed
"""
if not os.path.exists("outputs/"):
os.mkdir("outputs/")
if os.path.exists(filename) and str(filename) != "":
os.remove(filename)
def extract_data(url=DEFAULT_URL):
"""
Makes a get request against a provided url,
returning the response as a dictionary object if possible
Parameters:
url (string): a url string to a raw json or yaml source for an API documentation.
Default: Constant URL parameter
Returns:
data (dict): A dictionary object representing the response yaml/json,
Unsuccesful request: An empty dictionary
"""
try:
response = requests.get(url, timeout=TIMEOUT)
if response.status_code == 200 and url.endswith('yaml'):
data = yaml.safe_load(response.text)
elif response.status_code == 200 and not url.endswith('yaml'):
data = response.json()
else:
print(url, " responded with ", response.status_code,
" returning empty dictionary")
data = '{}'
except requests.JSONDecodeError:
print(f"{url} is not a valid json source, returning empty dictionary object")
data = '{}'
except requests.exceptions.Timeout:
print(f"Timeout exception caught for {url}, returning empty dictionary object")
data = '{}'
except requests.exceptions.TooManyRedirects:
# Tell the user their URL was bad and try a different one
print(f"TooManyRedirects exception caught for {url}, Returning empty dictionary object")
data = '{}'
except requests.exceptions.RequestException as r_e:
# catastrophic error. bail.
raise SystemExit(r_e) from r_e
return data
def find_parent_schema(response_dict, child_schema):
"""
Search all schemas for any reference to the child schema (i.e. find all direct parents).
Returns the results in a dataframe.
This function assumes the dictionary object passed in is generated from OpenAPI Swagger spec
Parameters:
response_dict (dict): A dictionary object representing the extract from API Docs,
Example: The output of the extract_data function above
child_schema (str): A string of the exact schema name to search for
Returns:
data_frame (pd.DataFrame): a dataframe of schema, field and reference information.
No results found: An empty dataframe
"""
data_frames = []
for schema in response_dict["components"]["schemas"]:
for field in response_dict["components"]["schemas"][schema]["properties"]:
try:
ref_to_test = response_dict["components"]["schemas"][schema]["properties"][field]["$ref"]
if isinstance(ref_to_test, str):
nested_ref = ref_to_test
except KeyError:
nested_ref = ""
try:
item_ref_to_test = response_dict["components"]["schemas"][schema]["properties"][field]["items"]["$ref"]
if isinstance(item_ref_to_test, str):
nested_item_ref = item_ref_to_test
except KeyError:
nested_item_ref = ""
if child_schema in nested_ref:
data_dict = {'Parent_Schema': [schema],
'Field': [field],
'Child_Schema': [nested_ref.split('/')[-1]]}
data_frames.append(pd.DataFrame(data=data_dict))
elif child_schema in nested_item_ref:
data_dict = {'Parent_Schema': [schema],
'Field': [field],
'Child_Schema': [nested_item_ref.split('/')[-1]]}
data_frames.append(pd.DataFrame(data=data_dict))
if not data_frames:
return pd.DataFrame()
data_frame = pd.concat(data_frames, axis=0)
data_frame = data_frame.reset_index(drop=True)
return data_frame
def get_nested_dictionary_or_value(my_dict, keys, return_value=0):
"""
A Really powerful iterative function to explore very deeply nested dictionaries.
You can use this to define generic search functions into nested dict objects,
bypassing the need for the dict[][] nomenclature
This also works when one of the keys might be missing, exiting without error.
It takes in a dictionary, and a list of key values, to return nested dictionary objects
however many levels deep you require WITHOUT lines and lines of code.
Parameters:
my_dict (dict): Dictionary object, can use this function recursively.
keys (list): A list of keys representing each nested level.
Returns:
my_dict (dict/string)
Which is the nested dictionary or value that you require..
return_value
..Unless one cannot be found in which case what you specify is returned.
default: 0
Example:
Suppose you have a dictionary like so:
`my_dict = {"a": {"b": {"c": {"d": 1}}}}`
To get to the `1`, instead of having to write:
`my_dict["a"]["b"]["c"]["d"]`
You can use this function by passing in those keys as a list, like so:
get_nested_dictionary_or_value(my_dict, ["a", "b", "c", "d"])
Usage:
Where this becomes more powerful is this method handles when one of those keys
might be missing rather than just exiting with a TypeError/KeyError, allowing you to
to iterate over nested objects that might not always exist, and use the return_value
to check for this existence in your own logic.
"""
if not keys:
return my_dict
key = keys[0]
try:
newdict = my_dict[key]
except (TypeError, KeyError):
return return_value
return get_nested_dictionary_or_value(newdict, keys[1:])