Skip to content

Commit 447d947

Browse files
authored
Fix arrow_flight_client imports (#329)
* Fix arrow_flight_client imports * Provide human-readable errors in case Pyarrow is not installed * Ruff
1 parent 31ad5aa commit 447d947

File tree

10 files changed

+85
-31
lines changed

10 files changed

+85
-31
lines changed

python/hopsworks_common/core/constants.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,35 @@
2525
HAS_CONFLUENT_KAFKA: bool = importlib.util.find_spec("confluent_kafka") is not None
2626
confluent_kafka_not_installed_message = (
2727
"Confluent Kafka package not found. "
28-
"If you want to use Kafka with Hopsworks you can install the corresponding extras "
29-
"""`pip install hopsworks[python]` or `pip install "hopsworks[python]"` if using zsh. """
30-
"You can also install confluent-kafka directly in your environment e.g `pip install confluent-kafka`. "
28+
"If you want to use Kafka with Hopsworks you can install the corresponding extras via "
29+
'`pip install "hopsworks[python]"`. '
30+
"You can also install confluent-kafka directly in your environment with `pip install confluent-kafka`. "
3131
"You will need to restart your kernel if applicable."
3232
)
33+
3334
# Data Validation / Great Expectations
3435
HAS_GREAT_EXPECTATIONS: bool = (
3536
importlib.util.find_spec("great_expectations") is not None
3637
)
3738
great_expectations_not_installed_message = (
3839
"Great Expectations package not found. "
39-
"If you want to use data validation with Hopsworks you can install the corresponding extras "
40-
"""`pip install hopsworks[great_expectations]` or `pip install "hopsworks[great_expectations]"` if using zsh. """
41-
"You can also install great-expectations directly in your environment e.g `pip install great-expectations`. "
40+
"If you want to use data validation with Hopsworks you can install the corresponding extras via "
41+
'`pip install "hopsworks[great_expectations]"`. '
42+
"You can also install great-expectations directly in your environment with `pip install great-expectations`. "
4243
"You will need to restart your kernel if applicable."
4344
)
4445
initialise_expectation_suite_for_single_expectation_api_message = "Initialize Expectation Suite by attaching to a Feature Group to enable single expectation API"
4546

46-
HAS_ARROW: bool = importlib.util.find_spec("pyarrow") is not None
47+
# Pyarrow
48+
HAS_PYARROW: bool = importlib.util.find_spec("pyarrow") is not None
49+
pyarrow_not_installed_message = (
50+
"Pyarrow package not found. "
51+
"If you want to use Apache Arrow with Hopsworks you can install the corresponding extras via "
52+
'`pip install "hopsworks[python]"`. '
53+
"You can also install pyarrow directly in your environment with `pip install pyarrow`. "
54+
"You will need to restart your kernel if applicable."
55+
)
56+
4757
HAS_PANDAS: bool = importlib.util.find_spec("pandas") is not None
4858
HAS_NUMPY: bool = importlib.util.find_spec("numpy") is not None
4959
HAS_POLARS: bool = importlib.util.find_spec("polars") is not None

python/hopsworks_common/core/type_systems.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323

2424
import pytz
2525
from hopsworks_common.core.constants import (
26-
HAS_ARROW,
2726
HAS_PANDAS,
2827
HAS_POLARS,
28+
HAS_PYARROW,
2929
)
3030
from hopsworks_common.decorators import uses_polars
3131

@@ -35,7 +35,7 @@
3535
import pandas as pd
3636
import polars as pl
3737

38-
if HAS_ARROW:
38+
if HAS_PYARROW:
3939
import pyarrow as pa
4040

4141
# Decimal types are currently not supported

python/hsfs/constructor/query.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from hsfs.constructor import join
3030
from hsfs.constructor.filter import Filter, Logic
3131
from hsfs.constructor.fs_query import FsQuery
32-
from hsfs.core import arrow_flight_client, query_constructor_api, storage_connector_api
32+
from hsfs.core import query_constructor_api, storage_connector_api
3333
from hsfs.decorators import typechecked
3434
from hsfs.feature import Feature
3535

@@ -101,6 +101,8 @@ def _prep_read(
101101
online_conn = None
102102

103103
if engine.get_instance().is_flyingduck_query_supported(self, read_options):
104+
from hsfs.core import arrow_flight_client
105+
104106
sql_query = self._to_string(fs_query, online, asof=True)
105107
sql_query = arrow_flight_client.get_instance().create_query_object(
106108
self, sql_query, fs_query.on_demand_fg_aliases

python/hsfs/core/arrow_flight_client.py

+6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
from functools import wraps
2424
from typing import Any, Dict, Optional, Union
2525

26+
from hopsworks_common.core.constants import HAS_PYARROW, pyarrow_not_installed_message
27+
28+
29+
if not HAS_PYARROW:
30+
raise ModuleNotFoundError(pyarrow_not_installed_message)
31+
2632
import pyarrow
2733
import pyarrow._flight
2834
import pyarrow.flight

python/hsfs/core/constants.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@
1616

1717
from hopsworks_common.core.constants import (
1818
HAS_AIOMYSQL,
19-
HAS_ARROW,
2019
HAS_AVRO,
2120
HAS_CONFLUENT_KAFKA,
2221
HAS_FAST_AVRO,
2322
HAS_GREAT_EXPECTATIONS,
2423
HAS_NUMPY,
2524
HAS_PANDAS,
2625
HAS_POLARS,
26+
HAS_PYARROW,
2727
HAS_SQLALCHEMY,
2828
great_expectations_not_installed_message,
2929
initialise_expectation_suite_for_single_expectation_api_message,
@@ -32,7 +32,7 @@
3232

3333
__all__ = [
3434
"HAS_AIOMYSQL",
35-
"HAS_ARROW",
35+
"HAS_PYARROW",
3636
"HAS_AVRO",
3737
"HAS_CONFLUENT_KAFKA",
3838
"HAS_FAST_AVRO",

python/hsfs/core/feature_view_engine.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from hsfs.client import exceptions
3434
from hsfs.constructor.filter import Filter, Logic
3535
from hsfs.core import (
36-
arrow_flight_client,
3736
feature_view_api,
3837
query_constructor_api,
3938
statistics_engine,
@@ -1009,7 +1008,17 @@ def get_models_provenance(
10091008

10101009
def _check_feature_group_accessibility(self, feature_view_obj):
10111010
if engine.get_type() == "python":
1012-
if arrow_flight_client.get_instance().is_enabled():
1011+
try:
1012+
from hsfs.core import arrow_flight_client
1013+
1014+
arrow_flight_client_imported = True
1015+
except ImportError:
1016+
arrow_flight_client_imported = False
1017+
1018+
if (
1019+
arrow_flight_client_imported
1020+
and arrow_flight_client.get_instance().is_enabled()
1021+
):
10131022
if not arrow_flight_client.supports(
10141023
feature_view_obj.query.featuregroups
10151024
):
@@ -1227,7 +1236,9 @@ def _get_feature_logging_data(
12271236
model_col_name=FeatureViewEngine._HSML_MODEL,
12281237
predictions=predictions,
12291238
training_dataset_version=training_dataset_version,
1230-
hsml_model=self.get_hsml_model_value(hsml_model) if hsml_model else None,
1239+
hsml_model=self.get_hsml_model_value(hsml_model)
1240+
if hsml_model
1241+
else None,
12311242
)
12321243
else:
12331244
return engine.get_instance().get_feature_logging_df(

python/hsfs/engine/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
import hopsworks_common.connection
2121
from hsfs.client import exceptions
22-
from hsfs.core import arrow_flight_client
2322
from hsfs.engine import spark, spark_no_metastore
2423

2524

@@ -83,5 +82,7 @@ def get_type() -> str:
8382

8483
def stop() -> None:
8584
global _engine
85+
from hsfs.core import arrow_flight_client
86+
8687
_engine = None
8788
arrow_flight_client.close()

python/hsfs/engine/python.py

+32-11
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
from hsfs import storage_connector as sc
6868
from hsfs.constructor import query
6969
from hsfs.core import (
70-
arrow_flight_client,
7170
dataset_api,
7271
feature_group_api,
7372
feature_view_api,
@@ -83,11 +82,12 @@
8382
)
8483
from hsfs.core.constants import (
8584
HAS_AIOMYSQL,
86-
HAS_ARROW,
8785
HAS_GREAT_EXPECTATIONS,
8886
HAS_PANDAS,
87+
HAS_PYARROW,
8988
HAS_SQLALCHEMY,
9089
)
90+
from hsfs.core.type_systems import PYARROW_HOPSWORKS_DTYPE_MAPPING
9191
from hsfs.core.vector_db_client import VectorDbClient
9292
from hsfs.feature_group import ExternalFeatureGroup, FeatureGroup
9393
from hsfs.training_dataset import TrainingDataset
@@ -98,8 +98,6 @@
9898
if HAS_GREAT_EXPECTATIONS:
9999
import great_expectations
100100

101-
if HAS_ARROW:
102-
from hsfs.core.type_systems import PYARROW_HOPSWORKS_DTYPE_MAPPING
103101
if HAS_AIOMYSQL and HAS_SQLALCHEMY:
104102
from hsfs.core import util_sql
105103

@@ -157,6 +155,8 @@ def sql(
157155
def is_flyingduck_query_supported(
158156
self, query: "query.Query", read_options: Optional[Dict[str, Any]] = None
159157
) -> bool:
158+
from hsfs.core import arrow_flight_client
159+
160160
return arrow_flight_client.is_query_supported(query, read_options or {})
161161

162162
def _validate_dataframe_type(self, dataframe_type: str):
@@ -180,6 +180,8 @@ def _sql_offline(
180180
) -> Union[pd.DataFrame, pl.DataFrame]:
181181
self._validate_dataframe_type(dataframe_type)
182182
if isinstance(sql_query, dict) and "query_string" in sql_query:
183+
from hsfs.core import arrow_flight_client
184+
183185
result_df = util.run_with_loading_animation(
184186
"Reading data from Hopsworks, using Hopsworks Feature Query Service",
185187
arrow_flight_client.get_instance().read_query,
@@ -342,6 +344,8 @@ def _read_hopsfs_remote(
342344

343345
for inode in inode_list:
344346
if not self._is_metadata_file(inode.path):
347+
from hsfs.core import arrow_flight_client
348+
345349
if arrow_flight_client.is_data_format_supported(
346350
data_format, read_options
347351
):
@@ -539,7 +543,10 @@ def profile(
539543
or pa.types.is_list(field.type)
540544
or pa.types.is_large_list(field.type)
541545
or pa.types.is_struct(field.type)
542-
) and PYARROW_HOPSWORKS_DTYPE_MAPPING[field.type] in ["timestamp", "date"]:
546+
) and PYARROW_HOPSWORKS_DTYPE_MAPPING.get(field.type, None) in [
547+
"timestamp",
548+
"date",
549+
]:
543550
if HAS_POLARS and (
544551
isinstance(df, pl.DataFrame)
545552
or isinstance(df, pl.dataframe.frame.DataFrame)
@@ -573,15 +580,21 @@ def profile(
573580
or pa.types.is_list(arrow_type)
574581
or pa.types.is_large_list(arrow_type)
575582
or pa.types.is_struct(arrow_type)
576-
or PYARROW_HOPSWORKS_DTYPE_MAPPING[arrow_type]
583+
or PYARROW_HOPSWORKS_DTYPE_MAPPING.get(arrow_type, None)
577584
in ["timestamp", "date", "binary", "string"]
578585
):
579586
dataType = "String"
580-
elif PYARROW_HOPSWORKS_DTYPE_MAPPING[arrow_type] in ["float", "double"]:
587+
elif PYARROW_HOPSWORKS_DTYPE_MAPPING.get(arrow_type, None) in [
588+
"float",
589+
"double",
590+
]:
581591
dataType = "Fractional"
582-
elif PYARROW_HOPSWORKS_DTYPE_MAPPING[arrow_type] in ["int", "bigint"]:
592+
elif PYARROW_HOPSWORKS_DTYPE_MAPPING.get(arrow_type, None) in [
593+
"int",
594+
"bigint",
595+
]:
583596
dataType = "Integral"
584-
elif PYARROW_HOPSWORKS_DTYPE_MAPPING[arrow_type] == "boolean":
597+
elif PYARROW_HOPSWORKS_DTYPE_MAPPING.get(arrow_type, None) == "boolean":
585598
dataType = "Boolean"
586599
else:
587600
print(
@@ -1077,8 +1090,16 @@ def write_training_dataset(
10771090
"Currently only query based training datasets are supported by the Python engine"
10781091
)
10791092

1093+
try:
1094+
from hsfs.core import arrow_flight_client
1095+
1096+
arrow_flight_client_imported = True
1097+
except ImportError:
1098+
arrow_flight_client_imported = False
1099+
10801100
if (
1081-
arrow_flight_client.is_query_supported(dataset, user_write_options)
1101+
arrow_flight_client_imported
1102+
and arrow_flight_client.is_query_supported(dataset, user_write_options)
10821103
and len(training_dataset.splits) == 0
10831104
and feature_view_obj
10841105
and len(feature_view_obj.transformation_functions) == 0
@@ -1251,7 +1272,7 @@ def _apply_transformation_function(
12511272
or isinstance(dataset, pl.dataframe.frame.DataFrame)
12521273
):
12531274
# Converting polars dataframe to pandas because currently we support only pandas UDF's as transformation functions.
1254-
if HAS_ARROW:
1275+
if HAS_PYARROW:
12551276
dataset = dataset.to_pandas(
12561277
use_pyarrow_extension_array=True
12571278
) # Zero copy if pyarrow extension can be used.

python/hsfs/feature_store.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
from hsfs.client import exceptions
3838
from hsfs.constructor.query import Query
3939
from hsfs.core import (
40-
arrow_flight_client,
4140
feature_group_api,
4241
feature_group_engine,
4342
feature_view_engine,
@@ -1790,10 +1789,14 @@ def get_feature_views(self, name: str) -> List[feature_view.FeatureView]:
17901789

17911790
def _disable_hopsworks_feature_query_service_client(self):
17921791
"""Disable Hopsworks feature query service for the current session. This behaviour is not persisted on reset."""
1792+
from hsfs.core import arrow_flight_client
1793+
17931794
arrow_flight_client._disable_feature_query_service_client()
17941795

17951796
def _reset_hopsworks_feature_query_service_client(self):
17961797
"""Reset Hopsworks feature query service for the current session."""
1798+
from hsfs.core import arrow_flight_client
1799+
17971800
arrow_flight_client.close()
17981801
arrow_flight_client.get_instance()
17991802

python/tests/core/test_type_systems.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717

1818
import pytest
1919
from hsfs.core import type_systems
20-
from hsfs.core.constants import HAS_ARROW, HAS_PANDAS
20+
from hsfs.core.constants import HAS_PANDAS, HAS_PYARROW
2121

2222

23-
if HAS_ARROW:
23+
if HAS_PYARROW:
2424
import pyarrow as pa
2525

2626
if HAS_PANDAS:
@@ -32,7 +32,7 @@
3232

3333
class TestTypeSystems:
3434
@pytest.mark.skipif(
35-
not HAS_ARROW or not HAS_PANDAS, reason="Arrow or Pandas are not installed"
35+
not HAS_PYARROW or not HAS_PANDAS, reason="Arrow or Pandas are not installed"
3636
)
3737
def test_infer_type_pyarrow_list(self):
3838
# Act

0 commit comments

Comments
 (0)