|
24 | 24 | import weakref
|
25 | 25 | from typing import Any, Optional
|
26 | 26 |
|
27 |
| -from hopsworks_common import client, usage, util, version |
| 27 | +from hopsworks_common import client, constants, usage, util, version |
28 | 28 | from hopsworks_common.core import (
|
29 | 29 | hosts_api,
|
30 | 30 | project_api,
|
@@ -98,13 +98,12 @@ class Connection:
|
98 | 98 | project: The name of the project to connect to. When running on Hopsworks, this
|
99 | 99 | defaults to the project from where the client is run from.
|
100 | 100 | Defaults to `None`.
|
101 |
| - engine: Which engine to use, `"spark"`, `"python"` or `"training"`. Defaults to `None`, |
102 |
| - which initializes the engine to Spark if the environment provides Spark, for |
103 |
| - example on Hopsworks and Databricks, or falls back to Python if Spark is not |
104 |
| - available, e.g. on local Python environments or AWS SageMaker. This option |
105 |
| - allows you to override this behaviour. `"training"` engine is useful when only |
106 |
| - feature store metadata is needed, for example training dataset location and label |
107 |
| - information when Hopsworks training experiment is conducted. |
| 101 | + engine: Specifies the engine to use. Possible options are "spark", "python", "training", "spark-no-metastore", or "spark-delta". The default value is None, which automatically selects the engine based on the environment: |
| 102 | + "spark": Used if Spark is available and the connection is not to serverless Hopsworks, such as in Hopsworks or Databricks environments. |
| 103 | + "python": Used in local Python environments or AWS SageMaker when Spark is not available or the connection is done to serverless Hopsworks. |
| 104 | + "training": Used when only feature store metadata is needed, such as for obtaining training dataset locations and label information during Hopsworks training experiments. |
| 105 | + "spark-no-metastore": Functions like "spark" but does not rely on the Hive metastore. |
| 106 | + "spark-delta": Minimizes dependencies further by avoiding both Hive metastore and HopsFS. |
108 | 107 | hostname_verification: Whether or not to verify Hopsworks' certificate, defaults
|
109 | 108 | to `True`.
|
110 | 109 | trust_store_path: Path on the file system containing the Hopsworks certificates,
|
@@ -338,30 +337,35 @@ def connect(self) -> None:
|
338 | 337 | self._connected = True
|
339 | 338 | finalizer = weakref.finalize(self, self.close)
|
340 | 339 | try:
|
| 340 | + external = client.base.Client.REST_ENDPOINT not in os.environ |
| 341 | + serverless = self._host == constants.HOSTS.APP_HOST |
341 | 342 | # determine engine, needed to init client
|
342 |
| - if (self._engine is not None and self._engine.lower() == "spark") or ( |
343 |
| - self._engine is None and importlib.util.find_spec("pyspark") |
| 343 | + if ( |
| 344 | + self._engine is None |
| 345 | + and importlib.util.find_spec("pyspark") |
| 346 | + and (not external or not serverless) |
344 | 347 | ):
|
345 | 348 | self._engine = "spark"
|
346 |
| - elif (self._engine is not None and self._engine.lower() == "python") or ( |
347 |
| - self._engine is None and not importlib.util.find_spec("pyspark") |
348 |
| - ): |
| 349 | + elif self._engine is None: |
| 350 | + self._engine = "python" |
| 351 | + elif self._engine.lower() == "spark": |
| 352 | + self._engine = "spark" |
| 353 | + elif self._engine.lower() == "python": |
349 | 354 | self._engine = "python"
|
350 |
| - elif self._engine is not None and self._engine.lower() == "training": |
| 355 | + elif self._engine.lower() == "training": |
351 | 356 | self._engine = "training"
|
352 |
| - elif ( |
353 |
| - self._engine is not None |
354 |
| - and self._engine.lower() == "spark-no-metastore" |
355 |
| - ): |
| 357 | + elif self._engine.lower() == "spark-no-metastore": |
356 | 358 | self._engine = "spark-no-metastore"
|
| 359 | + elif self._engine.lower() == "spark-delta": |
| 360 | + self._engine = "spark-delta" |
357 | 361 | else:
|
358 | 362 | raise ConnectionError(
|
359 | 363 | "Engine you are trying to initialize is unknown. "
|
360 | 364 | "Supported engines are `'spark'`, `'python'` and `'training'`."
|
361 | 365 | )
|
362 | 366 |
|
363 | 367 | # init client
|
364 |
| - if client.base.Client.REST_ENDPOINT not in os.environ: |
| 368 | + if external: |
365 | 369 | client.init(
|
366 | 370 | "external",
|
367 | 371 | self._host,
|
|
0 commit comments