Skip to content

Commit 23802f8

Browse files
authored
[FSTORE-1484] Make the way project ID is acquired uniform (logicalclocks#256)
* Make the way project ID is acquired uniform That is, make it so that the project ID is always acquired from the current client. * Ruff
1 parent 3de1efc commit 23802f8

39 files changed

+315
-518
lines changed

python/hopsworks/connection.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515
#
1616

17+
import importlib
1718
import os
1819
import re
1920
import sys
@@ -111,6 +112,7 @@ def __init__(
111112
self._api_key_file = api_key_file
112113
self._api_key_value = api_key_value
113114
self._connected = False
115+
self._engine = None
114116

115117
self.connect()
116118

@@ -239,12 +241,33 @@ def connect(self):
239241
try:
240242
# init client
241243
if client.base.Client.REST_ENDPOINT not in os.environ:
244+
# determine engine, needed to init client
245+
if (self._engine is not None and self._engine.lower() == "spark") or (
246+
self._engine is None and importlib.util.find_spec("pyspark")
247+
):
248+
self._engine = "spark"
249+
elif (
250+
self._engine is not None and self._engine.lower() == "python"
251+
) or (self._engine is None and not importlib.util.find_spec("pyspark")):
252+
self._engine = "python"
253+
elif self._engine is not None and self._engine.lower() == "training":
254+
self._engine = "training"
255+
elif (
256+
self._engine is not None
257+
and self._engine.lower() == "spark-no-metastore"
258+
):
259+
self._engine = "spark-no-metastore"
260+
else:
261+
raise ConnectionError(
262+
"Engine you are trying to initialize is unknown. "
263+
"Supported engines are `'spark'`, `'python'` and `'training'`."
264+
)
242265
client.init(
243266
"external",
244267
self._host,
245268
self._port,
246269
self._project,
247-
None,
270+
self._engine,
248271
self._hostname_verification,
249272
self._trust_store_path,
250273
self._cert_folder,

python/hopsworks/core/execution_api.py

+7-16
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,19 @@
1818

1919

2020
class ExecutionsApi:
21-
def __init__(
22-
self,
23-
project_id,
24-
):
25-
self._project_id = project_id
26-
2721
def _start(self, job, args: str = None):
2822
_client = client.get_instance()
29-
path_params = ["project", self._project_id, "jobs", job.name, "executions"]
23+
path_params = ["project", _client._project_id, "jobs", job.name, "executions"]
3024

3125
return execution.Execution.from_response_json(
32-
_client._send_request("POST", path_params, data=args), self._project_id, job
26+
_client._send_request("POST", path_params, data=args), job
3327
)
3428

3529
def _get(self, job, id):
3630
_client = client.get_instance()
3731
path_params = [
3832
"project",
39-
self._project_id,
33+
_client._project_id,
4034
"jobs",
4135
job.name,
4236
"executions",
@@ -45,14 +39,12 @@ def _get(self, job, id):
4539

4640
headers = {"content-type": "application/json"}
4741
return execution.Execution.from_response_json(
48-
_client._send_request("GET", path_params, headers=headers),
49-
self._project_id,
50-
job,
42+
_client._send_request("GET", path_params, headers=headers), job
5143
)
5244

5345
def _get_all(self, job):
5446
_client = client.get_instance()
55-
path_params = ["project", self._project_id, "jobs", job.name, "executions"]
47+
path_params = ["project", _client._project_id, "jobs", job.name, "executions"]
5648

5749
query_params = {"sort_by": "submissiontime:desc"}
5850

@@ -61,15 +53,14 @@ def _get_all(self, job):
6153
_client._send_request(
6254
"GET", path_params, headers=headers, query_params=query_params
6355
),
64-
self._project_id,
6556
job,
6657
)
6758

6859
def _delete(self, job_name, id):
6960
_client = client.get_instance()
7061
path_params = [
7162
"project",
72-
self._project_id,
63+
_client._project_id,
7364
"jobs",
7465
job_name,
7566
"executions",
@@ -81,7 +72,7 @@ def _stop(self, job_name: str, id: int) -> None:
8172
_client = client.get_instance()
8273
path_params = [
8374
"project",
84-
self._project_id,
75+
_client._project_id,
8576
"jobs",
8677
job_name,
8778
"executions",

python/hopsworks/core/flink_cluster_api.py

+9-23
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,8 @@
2323

2424

2525
class FlinkClusterApi:
26-
def __init__(
27-
self,
28-
project_id,
29-
project_name,
30-
):
31-
self._project_id = project_id
32-
self._project_name = project_name
33-
self._job_api = job_api.JobsApi(project_id, project_name)
26+
def __init__(self):
27+
self._job_api = job_api.JobsApi()
3428

3529
def get_configuration(self):
3630
"""Get configuration for the Flink cluster.
@@ -85,21 +79,17 @@ def setup_cluster(self, name: str, config=None):
8579
def _create_cluster(self, name: str, config: dict):
8680
_client = client.get_instance()
8781

88-
config = util.validate_job_conf(config, self._project_name)
82+
config = util.validate_job_conf(config, _client._project_name)
8983

90-
path_params = ["project", self._project_id, "jobs", name]
84+
path_params = ["project", _client._project_id, "jobs", name]
9185

9286
headers = {"content-type": "application/json"}
9387
flink_job = job.Job.from_response_json(
9488
_client._send_request(
9589
"PUT", path_params, headers=headers, data=json.dumps(config)
96-
),
97-
self._project_id,
98-
self._project_name,
99-
)
100-
flink_cluster_obj = flink_cluster.FlinkCluster(
101-
flink_job, self._project_id, self._project_name
90+
)
10291
)
92+
flink_cluster_obj = flink_cluster.FlinkCluster(flink_job)
10393
print(flink_cluster_obj.get_url())
10494
return flink_cluster_obj
10595

@@ -126,20 +116,16 @@ def get_cluster(self, name: str):
126116
_client = client.get_instance()
127117
path_params = [
128118
"project",
129-
self._project_id,
119+
_client._project_id,
130120
"jobs",
131121
name,
132122
]
133123
query_params = {"expand": ["creator"]}
134124
flink_job = job.Job.from_response_json(
135-
_client._send_request("GET", path_params, query_params=query_params),
136-
self._project_id,
137-
self._project_name,
125+
_client._send_request("GET", path_params, query_params=query_params)
138126
)
139127

140-
return flink_cluster.FlinkCluster(
141-
flink_job, self._project_id, self._project_name
142-
)
128+
return flink_cluster.FlinkCluster(flink_job)
143129

144130
def _get_job(self, execution, job_id):
145131
"""Get specific job from the specific execution of the flink cluster.

python/hopsworks/core/job_api.py

+13-29
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,6 @@
2121

2222

2323
class JobsApi:
24-
def __init__(
25-
self,
26-
project_id,
27-
project_name,
28-
):
29-
self._project_id = project_id
30-
self._project_name = project_name
31-
3224
def create_job(self, name: str, config: dict):
3325
"""Create a new job or update an existing one.
3426
@@ -57,17 +49,15 @@ def create_job(self, name: str, config: dict):
5749
"""
5850
_client = client.get_instance()
5951

60-
config = util.validate_job_conf(config, self._project_name)
52+
config = util.validate_job_conf(config, _client._project_name)
6153

62-
path_params = ["project", self._project_id, "jobs", name]
54+
path_params = ["project", _client._project_id, "jobs", name]
6355

6456
headers = {"content-type": "application/json"}
6557
created_job = job.Job.from_response_json(
6658
_client._send_request(
6759
"PUT", path_params, headers=headers, data=json.dumps(config)
68-
),
69-
self._project_id,
70-
self._project_name,
60+
)
7161
)
7262
print(created_job.get_url())
7363
return created_job
@@ -85,15 +75,13 @@ def get_job(self, name: str):
8575
_client = client.get_instance()
8676
path_params = [
8777
"project",
88-
self._project_id,
78+
_client._project_id,
8979
"jobs",
9080
name,
9181
]
9282
query_params = {"expand": ["creator"]}
9383
return job.Job.from_response_json(
94-
_client._send_request("GET", path_params, query_params=query_params),
95-
self._project_id,
96-
self._project_name,
84+
_client._send_request("GET", path_params, query_params=query_params)
9785
)
9886

9987
def get_jobs(self):
@@ -107,14 +95,12 @@ def get_jobs(self):
10795
_client = client.get_instance()
10896
path_params = [
10997
"project",
110-
self._project_id,
98+
_client._project_id,
11199
"jobs",
112100
]
113101
query_params = {"expand": ["creator"]}
114102
return job.Job.from_response_json(
115-
_client._send_request("GET", path_params, query_params=query_params),
116-
self._project_id,
117-
self._project_name,
103+
_client._send_request("GET", path_params, query_params=query_params)
118104
)
119105

120106
def exists(self, name: str):
@@ -146,7 +132,7 @@ def get_configuration(self, type: str):
146132
_client = client.get_instance()
147133
path_params = [
148134
"project",
149-
self._project_id,
135+
_client._project_id,
150136
"jobs",
151137
type.lower(),
152138
"configuration",
@@ -163,7 +149,7 @@ def _delete(self, job):
163149
_client = client.get_instance()
164150
path_params = [
165151
"project",
166-
self._project_id,
152+
_client._project_id,
167153
"jobs",
168154
str(job.name),
169155
]
@@ -182,20 +168,18 @@ def _update_job(self, name: str, config: dict):
182168

183169
config = util.validate_job_conf(config, self._project_name)
184170

185-
path_params = ["project", self._project_id, "jobs", name]
171+
path_params = ["project", _client._project_id, "jobs", name]
186172

187173
headers = {"content-type": "application/json"}
188174
return job.Job.from_response_json(
189175
_client._send_request(
190176
"PUT", path_params, headers=headers, data=json.dumps(config)
191-
),
192-
self._project_id,
193-
self._project_name,
177+
)
194178
)
195179

196180
def _schedule_job(self, name, schedule_config):
197181
_client = client.get_instance()
198-
path_params = ["project", self._project_id, "jobs", name, "schedule", "v2"]
182+
path_params = ["project", _client._project_id, "jobs", name, "schedule", "v2"]
199183
headers = {"content-type": "application/json"}
200184
method = "PUT" if schedule_config["id"] else "POST"
201185

@@ -207,7 +191,7 @@ def _schedule_job(self, name, schedule_config):
207191

208192
def _delete_schedule_job(self, name):
209193
_client = client.get_instance()
210-
path_params = ["project", self._project_id, "jobs", name, "schedule", "v2"]
194+
path_params = ["project", _client._project_id, "jobs", name, "schedule", "v2"]
211195

212196
return _client._send_request(
213197
"DELETE",

python/hopsworks/core/opensearch_api.py

+5-12
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,7 @@
2121

2222

2323
class OpenSearchApi:
24-
def __init__(
25-
self,
26-
project_id,
27-
project_name,
28-
):
29-
self._project_id = project_id
30-
self._project_name = project_name
24+
def __init__(self):
3125
self._variable_api = variable_api.VariableApi()
3226

3327
def _get_opensearch_url(self):
@@ -60,7 +54,8 @@ def get_project_index(self, index):
6054
Returns:
6155
A valid opensearch index name.
6256
"""
63-
return (self._project_name + "_" + index).lower()
57+
_client = client.get_instance()
58+
return (_client._project_name + "_" + index).lower()
6459

6560
def get_default_py_config(self):
6661
"""
@@ -91,9 +86,7 @@ def get_default_py_config(self):
9186
constants.OPENSEARCH_CONFIG.USE_SSL: True,
9287
constants.OPENSEARCH_CONFIG.VERIFY_CERTS: True,
9388
constants.OPENSEARCH_CONFIG.SSL_ASSERT_HOSTNAME: False,
94-
constants.OPENSEARCH_CONFIG.CA_CERTS: client.get_instance()._get_ca_chain_path(
95-
self._project_name
96-
),
89+
constants.OPENSEARCH_CONFIG.CA_CERTS: client.get_instance()._get_ca_chain_path(),
9790
}
9891

9992
def _get_authorization_token(self):
@@ -106,7 +99,7 @@ def _get_authorization_token(self):
10699
"""
107100

108101
_client = client.get_instance()
109-
path_params = ["elastic", "jwt", self._project_id]
102+
path_params = ["elastic", "jwt", _client._project_id]
110103

111104
headers = {"content-type": "application/json"}
112105
return _client._send_request("GET", path_params, headers=headers)["token"]

python/hopsworks/engine/execution_engine.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424

2525

2626
class ExecutionEngine:
27-
def __init__(self, project_id=None):
28-
self._dataset_api = dataset_api.DatasetApi(project_id)
29-
self._execution_api = execution_api.ExecutionsApi(project_id)
27+
def __init__(self):
28+
self._dataset_api = dataset_api.DatasetApi()
29+
self._execution_api = execution_api.ExecutionsApi()
3030
self._log = logging.getLogger(__name__)
3131

3232
def download_logs(self, execution, path=None):

0 commit comments

Comments
 (0)