Skip to content

Commit d6446c7

Browse files
committed
add unit test and fix unique col scaling
1 parent 6582682 commit d6446c7

File tree

2 files changed

+63
-2
lines changed

2 files changed

+63
-2
lines changed

ibis_ml/steps/_standardize.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,14 @@ def fit_table(self, table: ir.Table, metadata: Metadata) -> None:
6161
self._fit_expr = [expr]
6262
results = expr.execute().to_dict("records")[0]
6363
for name in columns:
64-
stats[name] = (results[f"{name}_max"], results[f"{name}_min"])
64+
col_max = results[f"{name}_max"]
65+
col_min = results[f"{name}_min"]
66+
if col_max == col_min:
67+
raise ValueError(
68+
f"Cannot standardize {name!r} - "
69+
"the maximum and minimum values are equal"
70+
)
71+
stats[name] = (col_max, col_min)
6572

6673
self.stats_ = stats
6774

@@ -121,7 +128,12 @@ def fit_table(self, table: ir.Table, metadata: Metadata) -> None:
121128
self._fit_expr = [table.aggregate(aggs)]
122129
results = self._fit_expr[-1].execute().to_dict("records")[0]
123130
for name in columns:
124-
stats[name] = (results[f"{name}_mean"], results[f"{name}_std"])
131+
col_std = results[f"{name}_std"]
132+
if col_std == 0:
133+
raise ValueError(
134+
f"Cannot standardize {name!r} - the standard deviation is zero"
135+
)
136+
stats[name] = (results[f"{name}_mean"], col_std)
125137

126138
self.stats_ = stats
127139

tests/test_standardize.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import ibis
2+
import numpy as np
3+
import pandas as pd
4+
import pandas.testing as tm
5+
import pytest
6+
7+
import ibis_ml as ml
8+
9+
10+
def test_scalestandard():
11+
cols = np.arange(0, 100)
12+
mean = np.mean(cols)
13+
std = np.std(cols)
14+
table = ibis.memtable({"col": cols})
15+
step = ml.ScaleStandard("col")
16+
step.fit_table(table, ml.core.Metadata())
17+
result = step.transform_table(table)
18+
expected = pd.DataFrame({"col": (cols - mean) / std})
19+
tm.assert_frame_equal(result.execute(), expected, check_exact=False)
20+
21+
22+
def test_scaleminmax():
23+
cols = np.arange(0, 100)
24+
min_val = np.min(cols)
25+
max_val = np.max(cols)
26+
table = ibis.memtable({"col": cols})
27+
step = ml.ScaleMinMax("col")
28+
step.fit_table(table, ml.core.Metadata())
29+
result = step.transform_table(table)
30+
expected = pd.DataFrame({"col": (cols - min_val) / (max_val - min_val)})
31+
tm.assert_frame_equal(result.execute(), expected, check_exact=False)
32+
33+
34+
@pytest.mark.parametrize(
35+
("model", "msg"),
36+
[
37+
("ScaleStandard", "Cannot standardize 'col' - the standard deviation is zero"),
38+
(
39+
"ScaleMinMax",
40+
"Cannot standardize 'col' - the maximum and minimum values are equal",
41+
),
42+
],
43+
)
44+
def test_scale_unique_col(model, msg):
45+
table = ibis.memtable({"col": [1]})
46+
scale_class = getattr(ml, model)
47+
step = scale_class("col")
48+
with pytest.raises(ValueError, match=msg):
49+
step.fit_table(table, ml.core.Metadata())

0 commit comments

Comments
 (0)