Skip to content

Commit f2b9d55

Browse files
authored
Replace hsfs with hopsworks where it is possible in docs (#374)
1 parent 4bc3b37 commit f2b9d55

File tree

7 files changed

+84
-16
lines changed

7 files changed

+84
-16
lines changed

python/hopsworks_common/client/online_store_rest_client.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ def _check_hopsworks_connection(self) -> None:
305305
assert (
306306
client.get_instance() is not None and client.get_instance()._connected
307307
), """Hopsworks Client is not connected. Please connect to Hopsworks cluster
308-
via hopsworks.login or hsfs.connection before initialising the Online Store REST Client.
308+
via hopsworks.login before initialising the Online Store REST Client.
309309
"""
310310
_logger.debug("Hopsworks connection is active.")
311311

python/hopsworks_common/connection.py

+68-1
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,74 @@ def connection(
484484
api_key_file: Optional[str] = None,
485485
api_key_value: Optional[str] = None,
486486
) -> Connection:
487-
"""Connection factory method, accessible through `hopsworks.connection()`."""
487+
"""Connection factory method, accessible through `hopsworks.connection()`.
488+
489+
This class provides convenience classmethods accessible from the `hopsworks`-module:
490+
491+
!!! example "Connection factory"
492+
For convenience, `hopsworks` provides a factory method, accessible from the top level
493+
module, so you don't have to import the `Connection` class manually:
494+
495+
```python
496+
import hopsworks
497+
conn = hopsworks.connection()
498+
```
499+
500+
!!! hint "Save API Key as File"
501+
To get started quickly, you can simply create a file with the previously
502+
created Hopsworks API Key and place it on the environment from which you
503+
wish to connect to Hopsworks.
504+
505+
You can then connect by simply passing the path to the key file when
506+
instantiating a connection:
507+
508+
```python hl_lines="6"
509+
import hopsworks
510+
conn = hopsworks.connection(
511+
'my_instance', # DNS of your Hopsworks instance
512+
443, # Port to reach your Hopsworks instance, defaults to 443
513+
api_key_file='hopsworks.key', # The file containing the API key generated above
514+
hostname_verification=True) # Disable for self-signed certificates
515+
)
516+
project = conn.get_project("my_project")
517+
```
518+
519+
Clients in external clusters need to connect to the Hopsworks using an
520+
API key. The API key is generated inside the Hopsworks platform, and requires at
521+
least the "project" scope to be able to access a project.
522+
For more information, see the [integration guides](../setup.md).
523+
524+
# Arguments
525+
host: The hostname of the Hopsworks instance in the form of `[UUID].cloud.hopsworks.ai`,
526+
defaults to `None`. Do **not** use the url including `https://` when connecting
527+
programatically.
528+
port: The port on which the Hopsworks instance can be reached,
529+
defaults to `443`.
530+
project: The name of the project to connect to. When running on Hopsworks, this
531+
defaults to the project from where the client is run from.
532+
Defaults to `None`.
533+
engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`,
534+
which initializes the engine to Spark if the environment provides Spark, for
535+
example on Hopsworks and Databricks, or falls back on Hive in Python if Spark is not
536+
available, e.g. on local Python environments or AWS SageMaker. This option
537+
allows you to override this behaviour. `"training"` engine is useful when only
538+
feature store metadata is needed, for example training dataset location and label
539+
information when Hopsworks training experiment is conducted.
540+
hostname_verification: Whether or not to verify Hopsworks' certificate, defaults
541+
to `True`.
542+
trust_store_path: Path on the file system containing the Hopsworks certificates,
543+
defaults to `None`.
544+
cert_folder: The directory to store retrieved HopsFS certificates, defaults to
545+
`"/tmp"`. Only required when running without a Spark environment.
546+
api_key_file: Path to a file containing the API Key, defaults to `None`.
547+
api_key_value: API Key as string, if provided, `api_key_file` will be ignored,
548+
however, this should be used with care, especially if the used notebook or
549+
job script is accessible by multiple parties. Defaults to `None`.
550+
551+
# Returns
552+
`Connection`. Connection handle to perform operations on a
553+
Hopsworks project.
554+
"""
488555
return cls(
489556
host,
490557
port,

python/hopsworks_common/project.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def get_feature_store(
129129
name: Project name of the feature store.
130130
engine: Which engine to use, `"spark"`, `"python"` or `"training"`.
131131
Defaults to `"python"` when connected to [Serverless Hopsworks](https://app.hopsworks.ai).
132-
See hsfs.Connection.connection documentation for more information.
132+
See [`hopsworks.connection`](connection.md#connection) documentation for more information.
133133
# Returns
134134
`hsfs.feature_store.FeatureStore`: The Feature Store API
135135
# Raises

python/hsfs/feature_store.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ def sql(
458458
For spark engine: Dictionary of read options for Spark.
459459
For python engine:
460460
If running queries on the online feature store, users can provide an entry `{'external': True}`,
461-
this instructs the library to use the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) to establish the connection to the online feature store.
461+
this instructs the library to use the `host` parameter in the [`hopsworks.login()`](login.md#login) to establish the connection to the online feature store.
462462
If not set, or set to False, the online feature store storage connector is used which relies on
463463
the private ip.
464464
Defaults to `{}`.

python/hsfs/feature_view.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ def init_serving(
337337
Transformation statistics are fetched from training dataset and applied to the feature vector.
338338
external: boolean, optional. If set to True, the connection to the
339339
online feature store is established using the same host as
340-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
340+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
341341
If set to False, the online feature store storage connector is used which relies on the private IP.
342342
Defaults to True if connection to Hopsworks is established from external environment (e.g AWS
343343
Sagemaker or Google Colab), otherwise to False.
@@ -592,7 +592,7 @@ def get_feature_vector(
592592
providing feature values which are not available in the feature store.
593593
external: boolean, optional. If set to True, the connection to the
594594
online feature store is established using the same host as
595-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
595+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
596596
If set to False, the online feature store storage connector is used
597597
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
598598
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -705,7 +705,7 @@ def get_feature_vectors(
705705
providing feature values which are not available in the feature store.
706706
external: boolean, optional. If set to True, the connection to the
707707
online feature store is established using the same host as
708-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
708+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
709709
If set to False, the online feature store storage connector is used
710710
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
711711
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -777,7 +777,7 @@ def get_inference_helper(
777777
Set of required primary keys is [`feature_view.primary_keys`](#primary_keys)
778778
external: boolean, optional. If set to True, the connection to the
779779
online feature store is established using the same host as
780-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
780+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
781781
If set to False, the online feature store storage connector is used
782782
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
783783
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -835,7 +835,7 @@ def get_inference_helpers(
835835
Set of required primary keys is [`feature_view.primary_keys`](#primary_keys)
836836
external: boolean, optional. If set to True, the connection to the
837837
online feature store is established using the same host as
838-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
838+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
839839
If set to False, the online feature store storage connector is used
840840
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
841841
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -912,7 +912,7 @@ def find_neighbors(
912912
filter: A filter expression to restrict the search space (optional).
913913
external: boolean, optional. If set to True, the connection to the
914914
online feature store is established using the same host as
915-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
915+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
916916
If set to False, the online feature store storage connector is used
917917
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
918918
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -3567,7 +3567,7 @@ def transform(
35673567
feature_vector: `Union[List[Any], List[List[Any]], pd.DataFrame, pl.DataFrame]`. The feature vector to be transformed.
35683568
external: boolean, optional. If set to True, the connection to the
35693569
online feature store is established using the same host as
3570-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
3570+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
35713571
If set to False, the online feature store storage connector is used
35723572
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
35733573
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.

python/hsfs/training_dataset.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ def init_prepared_statement(
10071007
initialised for retrieving serving vectors as a batch.
10081008
external: boolean, optional. If set to True, the connection to the
10091009
online feature store is established using the same host as
1010-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
1010+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
10111011
If set to False, the online feature store storage connector is used
10121012
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
10131013
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -1024,7 +1024,7 @@ def get_serving_vector(
10241024
serving application.
10251025
external: boolean, optional. If set to True, the connection to the
10261026
online feature store is established using the same host as
1027-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
1027+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
10281028
If set to False, the online feature store storage connector is used
10291029
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
10301030
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.
@@ -1046,7 +1046,7 @@ def get_serving_vectors(
10461046
serving application.
10471047
external: boolean, optional. If set to True, the connection to the
10481048
online feature store is established using the same host as
1049-
for the `host` parameter in the [`hsfs.connection()`](connection_api.md#connection) method.
1049+
for the `host` parameter in the [`hopsworks.login()`](login.md#login) method.
10501050
If set to False, the online feature store storage connector is used
10511051
which relies on the private IP. Defaults to True if connection to Hopsworks is established from
10521052
external environment (e.g AWS Sagemaker or Google Colab), otherwise to False.

python/hsml/core/dataset_api.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,11 @@ def upload(
6161
"""Upload a file to the Hopsworks filesystem.
6262
6363
```python
64+
import hopsworks
6465
65-
conn = hsml.connection(project="my-project")
66+
project = hopsworks.login(project="my-project")
6667
67-
dataset_api = conn.get_dataset_api()
68+
dataset_api = project.get_dataset_api()
6869
6970
uploaded_file_path = dataset_api.upload("my_local_file.txt", "Resources")
7071

0 commit comments

Comments
 (0)