Skip to content

Commit 08c0c69

Browse files
authoredSep 3, 2024
[FSTORE-1455] Add telemetry to hsml and hopsworks (logicalclocks#299)
* Move usage to hopsworks_common * Add telemetry * Add telemetry to hopsworks_common * Remove hopsworks.usage * Fix imports * Ruff * Fix review remarks * Fix Javier review remarks * Ruff
1 parent 2ea73c1 commit 08c0c69

34 files changed

+527
-297
lines changed
 

‎README.md

+17
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,23 @@ data = { "instances": [ model.input_example ] }
164164
predictions = deployment.predict(data)
165165
```
166166

167+
## Usage
168+
169+
Usage data is collected for improving quality of the library.
170+
It is turned on by default if the backend is [Hopsworks Serverless](https://c.app.hopsworks.ai).
171+
To turn it off, use one of the following ways:
172+
```python
173+
# use environment variable
174+
import os
175+
os.environ["ENABLE_HOPSWORKS_USAGE"] = "false"
176+
177+
# use `disable_usage_logging`
178+
import hopsworks
179+
hopsworks.disable_usage_logging()
180+
```
181+
182+
The corresponding source code is in `python/hopsworks_common/usage.py`.
183+
167184
## Tutorials
168185

169186
Need more inspiration or want to learn more about the Hopsworks platform? Check out our [tutorials](https://github.com/logicalclocks/hopsworks-tutorials).

‎python/hopsworks/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from hopsworks.connection import Connection
3333
from hopsworks.core import project_api, secret_api
3434
from hopsworks.decorators import NoHopsworksConnectionError
35+
from hopsworks_common import usage
3536
from requests.exceptions import SSLError
3637

3738

@@ -476,3 +477,7 @@ def _set_active_project(project):
476477
_client = client.get_instance()
477478
if _client._is_external():
478479
_client.provide_project(project.name)
480+
481+
482+
def disable_usage_logging():
483+
usage.disable()

‎python/hopsworks/connection.py

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from hopsworks import client, version
2424
from hopsworks.core import project_api, secret_api, variable_api
2525
from hopsworks.decorators import connected, not_connected
26+
from hopsworks_common import usage
2627
from requests.exceptions import ConnectionError
2728

2829

@@ -292,6 +293,7 @@ def connect(self):
292293
self._project_api = project_api.ProjectApi()
293294
self._secret_api = secret_api.SecretsApi()
294295
self._variable_api = variable_api.VariableApi()
296+
usage.init_usage(self._host, self._variable_api.get_version("hopsworks"))
295297
except (TypeError, ConnectionError):
296298
self._connected = False
297299
raise

‎python/hopsworks_common/core/dataset_api.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import time
2525
from concurrent.futures import ThreadPoolExecutor, wait
2626

27-
from hopsworks_common import client, util
27+
from hopsworks_common import client, usage, util
2828
from hopsworks_common.client.exceptions import DatasetException, RestAPIError
2929
from hopsworks_common.core import inode
3030
from tqdm.auto import tqdm
@@ -45,6 +45,7 @@ def __init__(self):
4545
DEFAULT_FLOW_CHUNK_SIZE = 1048576
4646
FLOW_PERMANENT_ERRORS = [404, 413, 415, 500, 501]
4747

48+
@usage.method_logger
4849
def download(self, path: str, local_path: str = None, overwrite: bool = False):
4950
"""Download file from Hopsworks Filesystem to the current working directory.
5051
@@ -136,6 +137,7 @@ def download(self, path: str, local_path: str = None, overwrite: bool = False):
136137

137138
return local_path
138139

140+
@usage.method_logger
139141
def upload(
140142
self,
141143
local_path: str,
@@ -341,6 +343,7 @@ def exists(self, path: str):
341343
except RestAPIError:
342344
return False
343345

346+
@usage.method_logger
344347
def remove(self, path: str):
345348
"""Remove a path in the Hopsworks Filesystem.
346349
@@ -353,6 +356,7 @@ def remove(self, path: str):
353356
path_params = ["project", _client._project_id, "dataset", path]
354357
_client._send_request("DELETE", path_params)
355358

359+
@usage.method_logger
356360
def mkdir(self, path: str):
357361
"""Create a directory in the Hopsworks Filesystem.
358362
@@ -387,6 +391,7 @@ def mkdir(self, path: str):
387391
"POST", path_params, headers=headers, query_params=query_params
388392
)["attributes"]["path"]
389393

394+
@usage.method_logger
390395
def copy(self, source_path: str, destination_path: str, overwrite: bool = False):
391396
"""Copy a file or directory in the Hopsworks Filesystem.
392397
@@ -426,6 +431,7 @@ def copy(self, source_path: str, destination_path: str, overwrite: bool = False)
426431
}
427432
_client._send_request("POST", path_params, query_params=query_params)
428433

434+
@usage.method_logger
429435
def move(self, source_path: str, destination_path: str, overwrite: bool = False):
430436
"""Move a file or directory in the Hopsworks Filesystem.
431437
@@ -466,6 +472,7 @@ def move(self, source_path: str, destination_path: str, overwrite: bool = False)
466472
}
467473
_client._send_request("POST", path_params, query_params=query_params)
468474

475+
@usage.method_logger
469476
def upload_feature_group(self, feature_group, path, dataframe):
470477
# Convert the dataframe into PARQUET for upload
471478
df_parquet = dataframe.to_parquet(index=False)
@@ -493,6 +500,7 @@ def upload_feature_group(self, feature_group, path, dataframe):
493500

494501
chunk_number += 1
495502

503+
@usage.method_logger
496504
def list_files(self, path, offset, limit):
497505
_client = client.get_instance()
498506
path_params = [
@@ -512,6 +520,7 @@ def list_files(self, path, offset, limit):
512520

513521
return inode_lst["count"], inode.Inode.from_response_json(inode_lst)
514522

523+
@usage.method_logger
515524
def read_content(self, path: str, dataset_type: str = "DATASET"):
516525
_client = client.get_instance()
517526

‎python/hopsworks_common/core/environment_api.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717
import json
1818
from typing import List, Optional
1919

20-
from hopsworks_common import client, environment
20+
from hopsworks_common import client, environment, usage
2121
from hopsworks_common.engine import environment_engine
2222

2323

2424
class EnvironmentApi:
2525
def __init__(self):
2626
self._environment_engine = environment_engine.EnvironmentEngine()
2727

28+
@usage.method_logger
2829
def create_environment(
2930
self,
3031
name: str,
@@ -80,6 +81,7 @@ def create_environment(
8081

8182
return env
8283

84+
@usage.method_logger
8385
def get_environments(self) -> List[environment.Environment]:
8486
"""
8587
Get all available python environments in the project
@@ -95,6 +97,7 @@ def get_environments(self) -> List[environment.Environment]:
9597
)
9698
)
9799

100+
@usage.method_logger
98101
def get_environment(self, name: str) -> environment.Environment:
99102
"""Get handle for the Python environment for the project
100103

‎python/hopsworks_common/core/flink_cluster_api.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import json
1818
import os
1919

20-
from hopsworks_common import client, flink_cluster, job, util
20+
from hopsworks_common import client, flink_cluster, job, usage, util
2121
from hopsworks_common.client.exceptions import RestAPIError
2222
from hopsworks_common.core import job_api
2323

@@ -26,6 +26,7 @@ class FlinkClusterApi:
2626
def __init__(self):
2727
self._job_api = job_api.JobApi()
2828

29+
@usage.method_logger
2930
def get_configuration(self):
3031
"""Get configuration for the Flink cluster.
3132
@@ -36,6 +37,7 @@ def get_configuration(self):
3637
"""
3738
return self._job_api.get_configuration("FLINK")
3839

40+
@usage.method_logger
3941
def setup_cluster(self, name: str, config=None):
4042
"""Create a new flink job representing a flink cluster, or update an existing one.
4143
@@ -95,6 +97,7 @@ def _create_cluster(self, name: str, config: dict):
9597
)
9698
return flink_cluster_obj
9799

100+
@usage.method_logger
98101
def get_cluster(self, name: str):
99102
"""Get the job corresponding to the flink cluster.
100103
```python

‎python/hopsworks_common/core/git_api.py

+7
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
git_file_status,
2525
git_op_execution,
2626
git_repo,
27+
usage,
2728
util,
2829
)
2930
from hopsworks_common.client.exceptions import GitException
@@ -38,6 +39,7 @@ def __init__(self):
3839
self._git_provider_api = git_provider_api.GitProviderApi()
3940
self._log = logging.getLogger(__name__)
4041

42+
@usage.method_logger
4143
def clone(self, url: str, path: str, provider: str = None, branch: str = None):
4244
"""Clone a new Git Repo in to Hopsworks Filesystem.
4345
@@ -101,6 +103,7 @@ def clone(self, url: str, path: str, provider: str = None, branch: str = None):
101103
created_repo = self.get_repo(git_op.repository.name, git_op.repository.path)
102104
return created_repo
103105

106+
@usage.method_logger
104107
def get_repos(self):
105108
"""Get the existing Git repositories
106109
@@ -120,6 +123,7 @@ def get_repos(self):
120123
_client._send_request("GET", path_params, query_params=query_params)
121124
)
122125

126+
@usage.method_logger
123127
def get_providers(self):
124128
"""Get the configured Git providers
125129
@@ -130,6 +134,7 @@ def get_providers(self):
130134
"""
131135
return self._git_provider_api._get_providers()
132136

137+
@usage.method_logger
133138
def get_provider(self, provider: str):
134139
"""Get the configured Git provider
135140
@@ -142,6 +147,7 @@ def get_provider(self, provider: str):
142147
"""
143148
return self._git_provider_api._get_provider(provider)
144149

150+
@usage.method_logger
145151
def set_provider(self, provider: str, username: str, token: str):
146152
"""Configure a Git provider
147153
@@ -165,6 +171,7 @@ def set_provider(self, provider: str, username: str, token: str):
165171
"""
166172
self._git_provider_api._set_provider(provider, username, token)
167173

174+
@usage.method_logger
168175
def get_repo(self, name: str, path: str = None):
169176
"""Get the cloned Git repository
170177

‎python/hopsworks_common/core/job_api.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import json
2020
from typing import Any, Dict, Union
2121

22-
from hopsworks_common import client, execution, job, job_schedule, util
22+
from hopsworks_common import client, execution, job, job_schedule, usage, util
2323
from hopsworks_common.client.exceptions import RestAPIError
2424
from hopsworks_common.core import (
2525
ingestion_job_conf,
@@ -28,6 +28,7 @@
2828

2929

3030
class JobApi:
31+
@usage.method_logger
3132
def create_job(self, name: str, config: dict):
3233
"""Create a new job or update an existing one.
3334
@@ -69,6 +70,7 @@ def create_job(self, name: str, config: dict):
6970
print(f"Job created successfully, explore it at {created_job.get_url()}")
7071
return created_job
7172

73+
@usage.method_logger
7274
def get_job(self, name: str):
7375
"""Get a job.
7476
@@ -91,6 +93,7 @@ def get_job(self, name: str):
9193
_client._send_request("GET", path_params, query_params=query_params)
9294
)
9395

96+
@usage.method_logger
9497
def get_jobs(self):
9598
"""Get all jobs.
9699
@@ -110,6 +113,7 @@ def get_jobs(self):
110113
_client._send_request("GET", path_params, query_params=query_params)
111114
)
112115

116+
@usage.method_logger
113117
def exists(self, name: str):
114118
"""Check if a job exists.
115119
@@ -126,6 +130,7 @@ def exists(self, name: str):
126130
except RestAPIError:
127131
return False
128132

133+
@usage.method_logger
129134
def get_configuration(self, type: str):
130135
"""Get configuration for the specific job type.
131136
@@ -206,6 +211,7 @@ def _delete_schedule_job(self, name):
206211
path_params,
207212
)
208213

214+
@usage.method_logger
209215
def create(
210216
self,
211217
name: str,
@@ -223,18 +229,21 @@ def create(
223229
)
224230
)
225231

232+
@usage.method_logger
226233
def launch(self, name: str, args: str = None) -> None:
227234
_client = client.get_instance()
228235
path_params = ["project", _client._project_id, "jobs", name, "executions"]
229236

230237
_client._send_request("POST", path_params, data=args)
231238

239+
@usage.method_logger
232240
def get(self, name: str) -> job.Job:
233241
_client = client.get_instance()
234242
path_params = ["project", _client._project_id, "jobs", name]
235243

236244
return job.Job.from_response_json(_client._send_request("GET", path_params))
237245

246+
@usage.method_logger
238247
def last_execution(self, job: job.Job) -> execution.Execution:
239248
_client = client.get_instance()
240249
path_params = ["project", _client._project_id, "jobs", job.name, "executions"]
@@ -249,6 +258,7 @@ def last_execution(self, job: job.Job) -> execution.Execution:
249258
job=job,
250259
)
251260

261+
@usage.method_logger
252262
def create_or_update_schedule_job(
253263
self, name: str, schedule_config: Dict[str, Any]
254264
) -> job_schedule.JobSchedule:
@@ -263,6 +273,7 @@ def create_or_update_schedule_job(
263273
)
264274
)
265275

276+
@usage.method_logger
266277
def delete_schedule_job(self, name: str) -> None:
267278
_client = client.get_instance()
268279
path_params = ["project", _client._project_id, "jobs", name, "schedule", "v2"]

0 commit comments

Comments
 (0)