Skip to content

Commit aa71647

Browse files
jitingxu1deepyaman
andauthored
build: make NumPy, pandas, and Arrow deps optional (ibis-project#152)
Co-authored-by: Deepyaman Datta <deepyaman.datta@utexas.edu>
1 parent a2d5829 commit aa71647

File tree

4 files changed

+47
-8
lines changed

4 files changed

+47
-8
lines changed

ibis_ml/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,12 @@ def _auto_patch_skorch() -> None:
4444
return
4545

4646
import ibis.expr.types as ir
47-
import numpy as np
4847

4948
old_fit = skorch.net.NeuralNet.fit
5049

5150
def fit(self, X, y=None, **fit_params):
51+
import numpy as np
52+
5253
if isinstance(y, ir.Column):
5354
y = np.asarray(y)
5455

ibis_ml/core.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
import ibis
1313
import ibis.expr.operations as ops
1414
import ibis.expr.types as ir
15-
import numpy as np
16-
import pandas as pd
17-
import pyarrow as pa
1815
from ibis.common.dispatch import lazy_singledispatch
1916

2017
if TYPE_CHECKING:
2118
import dask.dataframe as dd
19+
import numpy as np
20+
import pandas as pd
2221
import polars as pl
22+
import pyarrow as pa
2323
import xgboost as xgb
2424
from sklearn.utils._estimator_html_repr import _VisualBlock
2525

@@ -45,6 +45,9 @@ def _ibis_table_to_numpy(table: ir.Table) -> np.ndarray:
4545

4646
def _y_as_dataframe(y: Any) -> pd.DataFrame:
4747
"""Coerce `y` to a pandas dataframe"""
48+
import numpy as np
49+
import pandas as pd
50+
4851
if isinstance(y, pd.DataFrame):
4952
return y
5053
elif isinstance(y, pd.Series):
@@ -144,8 +147,11 @@ def _(X, y=None, maintain_order=False):
144147
return table, tuple(y.columns), None
145148

146149

147-
@normalize_table.register(pd.DataFrame)
150+
@normalize_table.register("pd.DataFrame")
148151
def _(X, y=None, maintain_order=False):
152+
import numpy as np
153+
import pandas as pd
154+
149155
if y is not None:
150156
y = _y_as_dataframe(y)
151157
table = pd.concat([X, y], axis=1)
@@ -162,8 +168,11 @@ def _(X, y=None, maintain_order=False):
162168
return ibis.memtable(table), targets, index
163169

164170

165-
@normalize_table.register(np.ndarray)
171+
@normalize_table.register("np.ndarray")
166172
def _(X, y=None, maintain_order=False):
173+
import numpy as np
174+
import pandas as pd
175+
167176
X = pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[-1])])
168177
if y is not None:
169178
y = _y_as_dataframe(y)
@@ -181,8 +190,11 @@ def _(X, y=None, maintain_order=False):
181190
return ibis.memtable(table), targets, index
182191

183192

184-
@normalize_table.register(pa.Table)
193+
@normalize_table.register("pa.Table")
185194
def _(X, y=None, maintain_order=False):
195+
import numpy as np
196+
import pyarrow as pa
197+
186198
if y is not None:
187199
if isinstance(y, (pa.ChunkedArray, pa.Array)):
188200
y = pa.Table.from_pydict({"y": y})
@@ -246,6 +258,8 @@ def get_categories(self, column: str) -> pa.Array | None:
246258
return self.categories.get(column)
247259

248260
def set_categories(self, column: str, values: pa.Array | list[Any]) -> None:
261+
import pyarrow as pa
262+
249263
self.categories[column] = pa.array(values)
250264

251265
def drop_categories(self, column: str) -> None:
@@ -255,6 +269,8 @@ def drop_categories(self, column: str) -> None:
255269
def _categorize_wrap_reader(
256270
reader: pa.RecordBatchReader, categories: dict[str, pa.Array]
257271
) -> Iterable[pa.RecordBatch]:
272+
import pyarrow as pa
273+
258274
for batch in reader:
259275
out = {}
260276
for name, col in zip(batch.schema.names, batch.columns):
@@ -620,6 +636,8 @@ def _categorize_pandas(self, df: pd.DataFrame) -> pd.DataFrame:
620636
return df
621637

622638
def _categorize_pyarrow(self, table: pa.Table) -> pa.Table:
639+
import pyarrow as pa
640+
623641
if not self.metadata_.categories:
624642
return table
625643

@@ -645,6 +663,8 @@ def _categorize_dask_dataframe(self, ddf: dd.DataFrame) -> dd.DataFrame:
645663
def _categorize_pyarrow_batches(
646664
self, reader: pa.RecordBatchReader
647665
) -> pa.RecordBatchReader:
666+
import pyarrow as pa
667+
648668
if not self.metadata_.categories:
649669
return reader
650670

ibis_ml/steps/_discretize.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
import ibis
66
import ibis.expr.types as ir
7-
import numpy as np
87

98
from ibis_ml.core import Metadata, Step
109
from ibis_ml.select import SelectionType, selector
@@ -94,6 +93,8 @@ def fit_table(self, table: ir.Table, metadata: Metadata) -> None:
9493
def _fit_uniform_strategy(
9594
self, table: ir.Table, columns: list[str]
9695
) -> dict[str, list[float]]:
96+
import numpy as np
97+
9798
aggs = []
9899
for col_name in columns:
99100
col = table[col_name]
@@ -117,6 +118,8 @@ def _fit_uniform_strategy(
117118
def _fit_quantile_strategy(
118119
self, table: ir.Table, columns: list[str]
119120
) -> dict[str, list[float]]:
121+
import numpy as np
122+
120123
aggs = []
121124
percentiles = np.linspace(0, 1, self.n_bins + 1)
122125
for col_name in columns:

tests/test_optional_dependencies.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import sys
2+
from importlib import import_module, reload
3+
from unittest.mock import patch
4+
5+
import pytest
6+
7+
8+
# https://stackoverflow.com/a/65163627
9+
@pytest.mark.parametrize("optional_dependency", ["numpy", "pandas", "pyarrow"])
10+
def test_without_dependency(optional_dependency):
11+
with patch.dict(sys.modules, {optional_dependency: None}):
12+
if "ibis_ml" in sys.modules:
13+
reload(sys.modules["ibis_ml"])
14+
else:
15+
import_module("ibis_ml")

0 commit comments

Comments
 (0)