@@ -90,8 +90,11 @@ def analyze(self, info: Dict[str, object], **kwargs) -> Dict[str, object]:
90
90
icp = icp_class (nc , cal_size = self .validation_size )
91
91
92
92
output ['icp' ]['__default' ] = icp
93
+ icp_df = deepcopy (ns .data )
93
94
94
95
# setup prediction cache to avoid additional .predict() calls
96
+ pred_is_list = isinstance (ns .normal_predictions ['prediction' ], list ) and \
97
+ isinstance (ns .normal_predictions ['prediction' ][0 ], list )
95
98
if ns .is_classification :
96
99
if ns .predictor .supports_proba :
97
100
icp .nc_function .model .prediction_cache = ns .normal_predictions [all_cat_cols ].values
@@ -105,7 +108,7 @@ def analyze(self, info: Dict[str, object], **kwargs) -> Dict[str, object]:
105
108
predicted_classes = pd .get_dummies (preds ).values # inflate to one-hot enc
106
109
icp .nc_function .model .prediction_cache = predicted_classes
107
110
108
- elif ns .is_multi_ts :
111
+ elif ns .is_multi_ts or pred_is_list :
109
112
# we fit ICPs for time series confidence bounds only at t+1 forecast
110
113
icp .nc_function .model .prediction_cache = np .array ([p [0 ] for p in ns .normal_predictions ['prediction' ]])
111
114
else :
@@ -116,6 +119,9 @@ def analyze(self, info: Dict[str, object], **kwargs) -> Dict[str, object]:
116
119
117
120
# fit additional ICPs in time series tasks with grouped columns
118
121
if ns .tss .is_timeseries and ns .tss .group_by :
122
+ # generate a multiindex
123
+ midx = pd .MultiIndex .from_frame (icp_df [[* ns .tss .group_by , f'__mdb_original_{ ns .tss .order_by [0 ]} ' ]])
124
+ icp_df .index = midx
119
125
120
126
# create an ICP for each possible group
121
127
group_info = ns .data [ns .tss .group_by ].to_dict ('list' )
@@ -127,7 +133,6 @@ def analyze(self, info: Dict[str, object], **kwargs) -> Dict[str, object]:
127
133
output ['icp' ][tuple (combination )] = deepcopy (icp )
128
134
129
135
# calibrate ICP
130
- icp_df = deepcopy (ns .data )
131
136
icp_df , y = clean_df (icp_df , ns .target , ns .is_classification , output .get ('label_encoders' , None ))
132
137
output ['icp' ]['__default' ].index = icp_df .columns
133
138
output ['icp' ]['__default' ].calibrate (icp_df .values , y )
@@ -137,11 +142,11 @@ def analyze(self, info: Dict[str, object], **kwargs) -> Dict[str, object]:
137
142
icp_df , icp , ns .dtype_dict [ns .target ],
138
143
output , positive_domain = self .positive_domain , significance = self .fixed_significance )
139
144
if not ns .is_classification :
140
- result_df = pd .DataFrame (index = ns . data .index , columns = ['confidence' , 'lower' , 'upper' ], dtype = float )
145
+ result_df = pd .DataFrame (index = icp_df .index , columns = ['confidence' , 'lower' , 'upper' ], dtype = float )
141
146
result_df .loc [icp_df .index , 'lower' ] = ranges [:, 0 ]
142
147
result_df .loc [icp_df .index , 'upper' ] = ranges [:, 1 ]
143
148
else :
144
- result_df = pd .DataFrame (index = ns . data .index , columns = ['confidence' ], dtype = float )
149
+ result_df = pd .DataFrame (index = icp_df .index , columns = ['confidence' ], dtype = float )
145
150
146
151
result_df .loc [icp_df .index , 'confidence' ] = conf
147
152
@@ -152,10 +157,12 @@ def analyze(self, info: Dict[str, object], **kwargs) -> Dict[str, object]:
152
157
153
158
# add all predictions to DF
154
159
icps_df = deepcopy (ns .data )
155
- if ns .is_multi_ts :
156
- icps_df [f'__predicted_{ ns .target } ' ] = [p [0 ] for p in ns .normal_predictions ['prediction' ]]
160
+ midx = pd .MultiIndex .from_frame (icps_df [[* ns .tss .group_by , f'__mdb_original_{ ns .tss .order_by [0 ]} ' ]])
161
+ icps_df .index = midx
162
+ if ns .is_multi_ts or pred_is_list :
163
+ icps_df [f'__predicted_{ ns .target } ' ] = np .array ([p [0 ] for p in ns .normal_predictions ['prediction' ]])
157
164
else :
158
- icps_df [f'__predicted_{ ns .target } ' ] = ns .normal_predictions ['prediction' ]
165
+ icps_df [f'__predicted_{ ns .target } ' ] = np . array ( ns .normal_predictions ['prediction' ])
159
166
160
167
for group in icps ['__mdb_groups' ]:
161
168
icp_df = icps_df
@@ -207,6 +214,7 @@ def analyze(self, info: Dict[str, object], **kwargs) -> Dict[str, object]:
207
214
# consolidate all groups here
208
215
output ['icp' ]['__mdb_active' ] = True
209
216
217
+ result_df .index = ns .data .index
210
218
output ['result_df' ] = result_df
211
219
212
220
info = {** info , ** output }
@@ -216,12 +224,21 @@ def explain(self, row_insights: pd.DataFrame, global_insights: Dict[str, object]
216
224
** kwargs ) -> Tuple [pd .DataFrame , Dict [str , object ]]:
217
225
ns = SimpleNamespace (** kwargs )
218
226
227
+ if 'confidence' in ns .predictions .columns :
228
+ # bypass calibrator if model already outputs confidence
229
+ row_insights ['prediction' ] = ns .predictions ['prediction' ]
230
+ row_insights ['confidence' ] = ns .predictions ['confidence' ]
231
+ if 'upper' in ns .predictions .columns and 'lower' in ns .predictions .columns :
232
+ row_insights ['upper' ] = ns .predictions ['upper' ]
233
+ row_insights ['lower' ] = ns .predictions ['lower' ]
234
+ return row_insights , global_insights
235
+
219
236
if ns .analysis ['icp' ]['__mdb_active' ]:
220
237
icp_X = deepcopy (ns .data )
221
238
222
239
# replace observed data w/predictions
223
240
preds = ns .predictions ['prediction' ]
224
- if ns .tss .is_timeseries and ns .tss .horizon > 1 :
241
+ if ns .tss .is_timeseries and ( ns .tss .horizon > 1 or isinstance ( preds [ 0 ], list )) :
225
242
preds = [p [0 ] for p in preds ]
226
243
227
244
for col in [f'timestep_{ i } ' for i in range (1 , ns .tss .horizon )]:
0 commit comments