@@ -193,6 +193,67 @@ def preprocessing_fn(inputs):
193
193
desired_batch_size = 20 ,
194
194
beam_pipeline = beam .Pipeline ())
195
195
196
+ @tft_unit .parameters (
197
+ (tf .int16 ,),
198
+ (tf .int32 ,),
199
+ (tf .int64 ,),
200
+ (tf .float32 ,),
201
+ (tf .float64 ,),
202
+ )
203
+ def testGaussianizeRagged (self , input_dtype ):
204
+ tft_unit .skip_if_not_tf2 ('RaggedFeature is not available in TF 1.x.' )
205
+
206
+ def preprocessing_fn (inputs ):
207
+ x_gaussianized = tft .scale_to_gaussian (tf .cast (inputs ['x' ], input_dtype ))
208
+ self .assertEqual (x_gaussianized .dtype ,
209
+ impl_test ._mean_output_dtype (input_dtype ))
210
+ return {'x_gaussianized' : tf .cast (x_gaussianized , tf .float32 )}
211
+
212
+ input_data_values = [
213
+ 516 , - 871 , 737 , 415 , 584 , 583 , 152 , 479 , 576 , 409 , 591 , 844 , - 16 , 508 ,
214
+ 669 , 617 , 502 , 532 , 517 , 479
215
+ ]
216
+ input_data = []
217
+ for idx , v in enumerate (input_data_values ):
218
+ input_data .append ({
219
+ 'val' : [v , - input_data_values [- 1 - idx ]],
220
+ 'row_lengths_1' : [2 , 1 , 0 ],
221
+ 'row_lengths_2' : [1 , 0 , 1 ],
222
+ })
223
+ input_metadata = tft_unit .metadata_from_feature_spec ({
224
+ 'x' :
225
+ tf .io .RaggedFeature (
226
+ tft_unit .canonical_numeric_dtype (input_dtype ),
227
+ value_key = 'val' ,
228
+ partitions = [
229
+ tf .io .RaggedFeature .RowLengths ('row_lengths_1' ), # pytype: disable=attribute-error
230
+ tf .io .RaggedFeature .RowLengths ('row_lengths_2' ) # pytype: disable=attribute-error
231
+ ]),
232
+ })
233
+ expected_data_values = [
234
+ 0.91555131 , - 1.54543642 , 1.30767697 , 0.73634456 , 1.03620536 , 1.03443104 ,
235
+ 0.26969729 , 0.84990131 , 1.02201077 , 0.72569862 , 1.04862563 , 1.49752966 ,
236
+ - 0.02838919 , 0.90135672 , 1.18702292 , 1.09475806 , 0.89071077 , 0.9439405 ,
237
+ 0.91732564 , 0.84990131
238
+ ]
239
+ expected_data = []
240
+ for idx , v in enumerate (expected_data_values ):
241
+ expected_data .append ({
242
+ 'x_gaussianized$ragged_values' : ([v ,
243
+ - expected_data_values [- 1 - idx ]]),
244
+ 'x_gaussianized$row_lengths_1' : [2 , 1 , 0 ],
245
+ 'x_gaussianized$row_lengths_2' : [1 , 0 , 1 ]
246
+ })
247
+
248
+ self .assertAnalyzeAndTransformResults (
249
+ input_data ,
250
+ input_metadata ,
251
+ preprocessing_fn ,
252
+ expected_data ,
253
+ desired_batch_size = 20 ,
254
+ # Runs the test deterministically on the whole batch.
255
+ beam_pipeline = beam .Pipeline ())
256
+
196
257
@tft_unit .named_parameters (
197
258
dict (
198
259
testcase_name = 'tukey_int64in' ,
@@ -497,5 +558,74 @@ def assert_and_cast_dtype(tensor, out_dtype):
497
558
# Runs the test deterministically on the whole batch.
498
559
beam_pipeline = beam .Pipeline ())
499
560
561
+ @tft_unit .parameters (
562
+ (tf .int16 ,),
563
+ (tf .int32 ,),
564
+ (tf .int64 ,),
565
+ (tf .float32 ,),
566
+ (tf .float64 ,),
567
+ )
568
+ def testTukeyHHAnalyzersWithRaggedInputs (self , input_dtype ):
569
+ tft_unit .skip_if_not_tf2 ('RaggedFeature is not available in TF 1.x.' )
570
+
571
+ output_dtype = impl_test ._mean_output_dtype (input_dtype )
572
+ canonical_output_dtype = tft_unit .canonical_numeric_dtype (output_dtype )
573
+
574
+ def analyzer_fn (inputs ):
575
+ a = tf .cast (inputs ['a' ], input_dtype )
576
+
577
+ def assert_and_cast_dtype (tensor ):
578
+ self .assertEqual (tensor .dtype , output_dtype )
579
+ return tf .cast (tensor , canonical_output_dtype )
580
+
581
+ return {
582
+ 'tukey_location' : assert_and_cast_dtype (tft .tukey_location (a )),
583
+ 'tukey_scale' : assert_and_cast_dtype (tft .tukey_scale (a )),
584
+ 'tukey_hl' : assert_and_cast_dtype (tft .tukey_h_params (a )[0 ]),
585
+ 'tukey_hr' : assert_and_cast_dtype (tft .tukey_h_params (a )[1 ]),
586
+ }
587
+
588
+ input_data_values = [
589
+ 516 , - 871 , 737 , 415 , 584 , 583 , 152 , 479 , 576 , 409 , 591 , 844 , - 16 , 508 ,
590
+ 669 , 617 , 502 , 532 , 517 , 479
591
+ ]
592
+ input_data = []
593
+ for idx , v in enumerate (input_data_values ):
594
+ input_data .append ({
595
+ 'val' : [v , - input_data_values [- 1 - idx ]],
596
+ 'row_lengths_1' : [2 , 0 , 1 ],
597
+ 'row_lengths_2' : [0 , 1 , 1 ]
598
+ })
599
+ input_metadata = tft_unit .metadata_from_feature_spec ({
600
+ 'a' :
601
+ tf .io .RaggedFeature (
602
+ tft_unit .canonical_numeric_dtype (input_dtype ),
603
+ value_key = 'val' ,
604
+ partitions = [
605
+ tf .io .RaggedFeature .RowLengths ('row_lengths_1' ), # pytype: disable=attribute-error
606
+ tf .io .RaggedFeature .RowLengths ('row_lengths_2' ) # pytype: disable=attribute-error
607
+ ]),
608
+ })
609
+
610
+ expected_outputs = {
611
+ 'tukey_location' :
612
+ np .array (0.0 , canonical_output_dtype .as_numpy_dtype ),
613
+ 'tukey_scale' :
614
+ np .array (572.2776 , canonical_output_dtype .as_numpy_dtype ),
615
+ 'tukey_hl' :
616
+ np .array (0.0 , canonical_output_dtype .as_numpy_dtype ),
617
+ 'tukey_hr' :
618
+ np .array (0.0 , canonical_output_dtype .as_numpy_dtype ),
619
+ }
620
+
621
+ self .assertAnalyzerOutputs (
622
+ input_data ,
623
+ input_metadata ,
624
+ analyzer_fn ,
625
+ expected_outputs ,
626
+ desired_batch_size = 20 ,
627
+ # Runs the test deterministically on the whole batch.
628
+ beam_pipeline = beam .Pipeline ())
629
+
500
630
if __name__ == '__main__' :
501
631
tft_unit .main ()
0 commit comments