@@ -62,11 +62,34 @@ def test_ordinal_encode(t_train, t_test):
62
62
tm .assert_frame_equal (res .execute (), expected .execute (), check_dtype = False )
63
63
64
64
65
- def test_one_hot_encode (t_train , t_test ):
66
- step = ml .OneHotEncode ("ticker" )
65
+ @pytest .mark .parametrize (
66
+ ("min_frequency" , "max_categories" , "expected" ),
67
+ [
68
+ (
69
+ None ,
70
+ None ,
71
+ {
72
+ "ticker_AAPL" : [0 , 0 , 0 , 0 , 0 , 0 ],
73
+ "ticker_GOOG" : [0 , 0 , 1 , 1 , 0 , 0 ],
74
+ "ticker_MSFT" : [1 , 1 , 0 , 0 , 0 , 0 ],
75
+ "ticker_None" : [0 , 0 , 0 , 0 , 0 , 1 ],
76
+ },
77
+ ),
78
+ (
79
+ 2 ,
80
+ None ,
81
+ {"ticker_GOOG" : [0 , 0 , 1 , 1 , 0 , 0 ], "ticker_MSFT" : [1 , 1 , 0 , 0 , 0 , 0 ]},
82
+ ),
83
+ (None , 1 , {"ticker_MSFT" : [1 , 1 , 0 , 0 , 0 , 0 ]}),
84
+ ],
85
+ )
86
+ def test_onehotencode (t_train , t_test , min_frequency , max_categories , expected ):
87
+ step = ml .OneHotEncode (
88
+ "ticker" , min_frequency = min_frequency , max_categories = max_categories
89
+ )
67
90
step .fit_table (t_train , ml .core .Metadata ())
68
91
result = step .transform_table (t_test )
69
- expected = pd .DataFrame (
92
+ expected_df = pd .DataFrame (
70
93
{
71
94
"time" : [
72
95
pd .Timestamp ("2016-05-25 13:30:00.023" ),
@@ -76,13 +99,10 @@ def test_one_hot_encode(t_train, t_test):
76
99
pd .Timestamp ("2016-05-25 13:30:00.050" ),
77
100
pd .Timestamp ("2016-05-25 13:30:00.051" ),
78
101
],
79
- "ticker_AAPL" : [0 , 0 , 0 , 0 , 0 , 0 ],
80
- "ticker_GOOG" : [0 , 0 , 1 , 1 , 0 , 0 ],
81
- "ticker_MSFT" : [1 , 1 , 0 , 0 , 0 , 0 ],
82
- "ticker_None" : [0 , 0 , 0 , 0 , 0 , 1 ],
102
+ ** expected ,
83
103
}
84
104
)
85
- tm .assert_frame_equal (result .execute (), expected , check_dtype = False )
105
+ tm .assert_frame_equal (result .execute (), expected_df , check_dtype = False )
86
106
87
107
88
108
@pytest .mark .parametrize ("smooth" , [5000.0 , 1.0 , 0.0 ])
0 commit comments