1
- from copy import deepcopy
2
- from typing import Dict , Tuple , List , Union
1
+ from typing import Dict , Tuple , List
3
2
4
- import optuna
5
3
import numpy as np
6
4
import pandas as pd
7
- from sktime .transformations .series .detrend import Detrender
8
- from sktime .forecasting .trend import PolynomialTrendForecaster
9
- from sktime .transformations .series .detrend import ConditionalDeseasonalizer
5
+ from type_infer .dtype import dtype
10
6
11
7
from lightwood .api .types import TimeseriesSettings
12
- from type_infer .dtype import dtype
13
- from lightwood .helpers .ts import get_ts_groups , get_delta , get_group_matches , Differencer
14
- from lightwood .helpers .log import log
8
+ from lightwood .helpers .ts import get_ts_groups , get_delta , Differencer
15
9
from lightwood .encoder .time_series .helpers .common import generate_target_group_normalizers
16
10
17
11
@@ -36,18 +30,16 @@ def timeseries_analyzer(data: Dict[str, pd.DataFrame], dtype_dict: Dict[str, str
36
30
""" # noqa
37
31
tss = timeseries_settings
38
32
groups = get_ts_groups (data ['train' ], tss )
39
- deltas , periods , freqs = get_delta (data ['train' ], dtype_dict , groups , target , tss )
33
+ deltas , periods , freqs = get_delta (data ['train' ], tss )
40
34
41
- normalizers = generate_target_group_normalizers (data ['train' ], target , dtype_dict , groups , tss )
35
+ normalizers = generate_target_group_normalizers (data ['train' ], target , dtype_dict , tss )
42
36
43
37
if dtype_dict [target ] in (dtype .integer , dtype .float , dtype .num_tsarray ):
44
- naive_forecast_residuals , scale_factor = get_grouped_naive_residuals (data ['dev' ], target , tss , groups )
45
- differencers = get_differencers (data ['train' ], target , groups , tss .group_by )
46
- stl_transforms = get_stls (data ['train' ], data ['dev' ], target , periods , groups , tss )
38
+ naive_forecast_residuals , scale_factor = get_grouped_naive_residuals (data ['dev' ], target , tss )
39
+ differencers = get_differencers (data ['train' ], target , tss .group_by )
47
40
else :
48
41
naive_forecast_residuals , scale_factor = {}, {}
49
42
differencers = {}
50
- stl_transforms = {}
51
43
52
44
return {'target_normalizers' : normalizers ,
53
45
'deltas' : deltas ,
@@ -57,7 +49,7 @@ def timeseries_analyzer(data: Dict[str, pd.DataFrame], dtype_dict: Dict[str, str
57
49
'ts_naive_mae' : scale_factor ,
58
50
'periods' : periods ,
59
51
'sample_freqs' : freqs ,
60
- 'stl_transforms' : stl_transforms ,
52
+ 'stl_transforms' : {}, # TODO: remove, or provide from outside as user perhaps
61
53
'differencers' : differencers
62
54
}
63
55
@@ -87,121 +79,27 @@ def get_naive_residuals(target_data: pd.DataFrame, m: int = 1) -> Tuple[List, fl
87
79
def get_grouped_naive_residuals (
88
80
info : pd .DataFrame ,
89
81
target : str ,
90
- tss : TimeseriesSettings ,
91
- group_combinations : List ) -> Tuple [Dict , Dict ]:
82
+ tss : TimeseriesSettings
83
+ ) -> Tuple [Dict , Dict ]:
92
84
"""
93
85
Wraps `get_naive_residuals` for a dataframe with multiple co-existing time series.
94
86
""" # noqa
95
87
group_residuals = {}
96
88
group_scale_factors = {}
97
- for group in group_combinations :
98
- idxs , subset = get_group_matches ( info , group , tss . group_by )
89
+ grouped = info . groupby ( by = tss . group_by ) if tss . group_by else info . groupby ( lambda x : '__default' )
90
+ for group , subset in grouped :
99
91
if subset .shape [0 ] > 1 :
100
92
residuals , scale_factor = get_naive_residuals (subset [target ]) # @TODO: pass m once we handle seasonality
101
93
group_residuals [group ] = residuals
102
94
group_scale_factors [group ] = scale_factor
103
95
return group_residuals , group_scale_factors
104
96
105
97
106
- def get_differencers (data : pd .DataFrame , target : str , groups : List , group_cols : List ):
98
+ def get_differencers (data : pd .DataFrame , target : str , group_cols : List ):
107
99
differencers = {}
108
- for group in groups :
109
- idxs , subset = get_group_matches ( data , group , group_cols )
100
+ grouped = data . groupby ( by = group_cols ) if group_cols else data . groupby ( lambda x : True )
101
+ for group , subset in grouped :
110
102
differencer = Differencer ()
111
103
differencer .fit (subset [target ].values )
112
104
differencers [group ] = differencer
113
105
return differencers
114
-
115
-
116
- def get_stls (train_df : pd .DataFrame ,
117
- dev_df : pd .DataFrame ,
118
- target : str ,
119
- sps : Dict ,
120
- groups : list ,
121
- tss : TimeseriesSettings
122
- ) -> Dict [str , object ]:
123
- stls = {'__default' : None }
124
- for group in groups :
125
- if group != '__default' :
126
- _ , tr_subset = get_group_matches (train_df , group , tss .group_by )
127
- _ , dev_subset = get_group_matches (dev_df , group , tss .group_by )
128
- if tr_subset .shape [0 ] > 0 and dev_subset .shape [0 ] > 0 and sps .get (group , False ):
129
- group_freq = tr_subset ['__mdb_inferred_freq' ].iloc [0 ]
130
- tr_subset = deepcopy (tr_subset )[target ]
131
- dev_subset = deepcopy (dev_subset )[target ]
132
- tr_subset .index = pd .date_range (start = tr_subset .iloc [0 ], freq = group_freq ,
133
- periods = len (tr_subset )).to_period ()
134
- dev_subset .index = pd .date_range (start = dev_subset .iloc [0 ], freq = group_freq ,
135
- periods = len (dev_subset )).to_period ()
136
- stl = _pick_ST (tr_subset , dev_subset , sps [group ])
137
- log .info (f'Best STL decomposition params for group { group } are: { stl ["best_params" ]} ' )
138
- stls [group ] = stl
139
- return stls
140
-
141
-
142
- def _pick_ST (tr_subset : pd .Series , dev_subset : pd .Series , sp : list ):
143
- """
144
- Perform hyperparam search with optuna to find best combination of ST transforms for a time series.
145
-
146
- :param tr_subset: training series used for fitting blocks. Index should be datetime, and values are the actual time series.
147
- :param dev_subset: dev series used for computing loss. Index should be datetime, and values are the actual time series.
148
- :param sp: list of candidate seasonal periods
149
- :return: best deseasonalizer and detrender combination based on dev_loss
150
- """ # noqa
151
-
152
- def _ST_objective (trial : optuna .Trial ):
153
- trend_degree = trial .suggest_categorical ("trend_degree" , [1 ])
154
- ds_sp = trial .suggest_categorical ("ds_sp" , sp ) # seasonality period to use in deseasonalizer
155
- if min (min (tr_subset ), min (dev_subset )) <= 0 :
156
- decomp_type = trial .suggest_categorical ("decomp_type" , ['additive' ])
157
- else :
158
- decomp_type = trial .suggest_categorical ("decomp_type" , ['additive' , 'multiplicative' ])
159
-
160
- detrender = Detrender (forecaster = PolynomialTrendForecaster (degree = trend_degree ))
161
- deseasonalizer = ConditionalDeseasonalizer (sp = ds_sp , model = decomp_type )
162
- transformer = STLTransformer (detrender = detrender , deseasonalizer = deseasonalizer , type = decomp_type )
163
- transformer .fit (tr_subset )
164
- residuals = transformer .transform (dev_subset )
165
-
166
- trial .set_user_attr ("transformer" , transformer )
167
- return np .power (residuals , 2 ).sum ()
168
-
169
- space = {"trend_degree" : [1 , 2 ], "ds_sp" : sp , "decomp_type" : ['additive' , 'multiplicative' ]}
170
- study = optuna .create_study (sampler = optuna .samplers .GridSampler (space ))
171
- study .optimize (_ST_objective , n_trials = 8 )
172
-
173
- return {
174
- "transformer" : study .best_trial .user_attrs ['transformer' ],
175
- "best_params" : study .best_params
176
- }
177
-
178
-
179
- class STLTransformer :
180
- def __init__ (self , detrender : Detrender , deseasonalizer : ConditionalDeseasonalizer , type : str = 'additive' ):
181
- """
182
- Class that handles STL transformation and inverse, given specific detrender and deseasonalizer instances.
183
- :param detrender: Already initialized.
184
- :param deseasonalizer: Already initialized.
185
- :param type: Either 'additive' or 'multiplicative'.
186
- """ # noqa
187
- self ._type = type
188
- self .detrender = detrender
189
- self .deseasonalizer = deseasonalizer
190
- self .op = {
191
- 'additive' : lambda x , y : x - y ,
192
- 'multiplicative' : lambda x , y : x / y
193
- }
194
- self .iop = {
195
- 'additive' : lambda x , y : x + y ,
196
- 'multiplicative' : lambda x , y : x * y
197
- }
198
-
199
- def fit (self , x : Union [pd .DataFrame , pd .Series ]):
200
- self .deseasonalizer .fit (x )
201
- self .detrender .fit (self .op [self ._type ](x , self .deseasonalizer .transform (x )))
202
-
203
- def transform (self , x : Union [pd .DataFrame , pd .Series ]):
204
- return self .detrender .transform (self .deseasonalizer .transform (x ))
205
-
206
- def inverse_transform (self , x : Union [pd .DataFrame , pd .Series ]):
207
- return self .deseasonalizer .inverse_transform (self .detrender .inverse_transform (x ))
0 commit comments