112
112
113
113
def _apply_cacheable_combiner (
114
114
combiner : analyzer_nodes .Combiner ,
115
- * tensor_inputs : common_types .TensorType ) -> Tuple [tf .Tensor , ...]:
115
+ * tensor_inputs : common_types .InputTensorType ) -> Tuple [tf .Tensor , ...]:
116
116
"""Applies the combiner over the whole dataset possibly utilizing cache."""
117
117
input_values_node = analyzer_nodes .get_input_tensors_value_nodes (
118
118
tensor_inputs )
@@ -137,7 +137,7 @@ def _apply_cacheable_combiner(
137
137
138
138
def _apply_cacheable_combiner_per_key (
139
139
combiner : analyzer_nodes .Combiner ,
140
- * tensor_inputs : common_types .TensorType ) -> Tuple [tf .Tensor , ...]:
140
+ * tensor_inputs : common_types .InputTensorType ) -> Tuple [tf .Tensor , ...]:
141
141
"""Similar to _apply_cacheable_combiner but this is computed per key."""
142
142
input_values_node = analyzer_nodes .get_input_tensors_value_nodes (
143
143
tensor_inputs )
@@ -162,7 +162,7 @@ def _apply_cacheable_combiner_per_key(
162
162
163
163
def _apply_cacheable_combiner_per_key_large (
164
164
combiner : analyzer_nodes .Combiner , key_vocabulary_filename : str ,
165
- * tensor_inputs : common_types .TensorType
165
+ * tensor_inputs : common_types .InputTensorType
166
166
) -> Union [tf .Tensor , common_types .Asset ]:
167
167
"""Similar to above but saves the combined result to a file."""
168
168
input_values_node = analyzer_nodes .get_input_tensors_value_nodes (
@@ -382,16 +382,16 @@ def _numeric_combine(inputs: List[tf.Tensor],
382
382
383
383
@common .log_api_use (common .ANALYZER_COLLECTION )
384
384
def min ( # pylint: disable=redefined-builtin
385
- x : common_types .TensorType ,
385
+ x : common_types .InputTensorType ,
386
386
reduce_instance_dims : bool = True ,
387
387
name : Optional [str ] = None ) -> tf .Tensor :
388
388
"""Computes the minimum of the values of a `Tensor` over the whole dataset.
389
389
390
- In the case of a `SparseTensor ` missing values will be used in return value:
391
- for float, NaN is used and for other dtypes the max is used.
390
+ In the case of a `CompositeTensor ` missing values will be used in return
391
+ value: for float, NaN is used and for other dtypes the max is used.
392
392
393
393
Args:
394
- x: A `Tensor` or `SparseTensor `.
394
+ x: A `Tensor` or `CompositeTensor `.
395
395
reduce_instance_dims: By default collapses the batch and instance dimensions
396
396
to arrive at a single scalar output. If False, only collapses the batch
397
397
dimension and outputs a `Tensor` of the same shape as the input.
@@ -409,16 +409,16 @@ def min( # pylint: disable=redefined-builtin
409
409
410
410
@common .log_api_use (common .ANALYZER_COLLECTION )
411
411
def max ( # pylint: disable=redefined-builtin
412
- x : common_types .TensorType ,
412
+ x : common_types .InputTensorType ,
413
413
reduce_instance_dims : bool = True ,
414
414
name : Optional [str ] = None ) -> tf .Tensor :
415
415
"""Computes the maximum of the values of a `Tensor` over the whole dataset.
416
416
417
- In the case of a `SparseTensor ` missing values will be used in return value:
418
- for float, NaN is used and for other dtypes the min is used.
417
+ In the case of a `CompositeTensor ` missing values will be used in return
418
+ value: for float, NaN is used and for other dtypes the min is used.
419
419
420
420
Args:
421
- x: A `Tensor` or `SparseTensor `.
421
+ x: A `Tensor` or `CompositeTensor `.
422
422
reduce_instance_dims: By default collapses the batch and instance dimensions
423
423
to arrive at a single scalar output. If False, only collapses the batch
424
424
dimension and outputs a vector of the same shape as the input.
@@ -433,19 +433,20 @@ def max( # pylint: disable=redefined-builtin
433
433
return _min_and_max (x , reduce_instance_dims , name )[1 ]
434
434
435
435
436
- def _min_and_max (x : common_types .TensorType ,
436
+ def _min_and_max (x : common_types .InputTensorType ,
437
437
reduce_instance_dims : bool = True ,
438
438
name : Optional [str ] = None ) -> Tuple [tf .Tensor , tf .Tensor ]:
439
- """Computes the min and max of the values of a `Tensor` or `SparseTensor `.
439
+ """Computes the min and max of the values of a `Tensor` or `CompositeTensor `.
440
440
441
- In the case of a `SparseTensor` missing values will be used in return value:
441
+ In the case of a `CompositeTensor` missing values will be used in return
442
+ value:
442
443
for float, NaN is used and for other dtypes the min is used.
443
444
444
445
Args:
445
- x: A `Tensor` or `SparseTensor `.
446
+ x: A `Tensor` or `CompositeTensor `.
446
447
reduce_instance_dims: By default collapses the batch and instance dimensions
447
- to arrive at a single scalar output. If False, only collapses the batch
448
- dimension and outputs a vector of the same shape as the input.
448
+ to arrive at a single scalar output. If False, only collapses the batch
449
+ dimension and outputs a vector of the same shape as the input.
449
450
name: (Optional) A name for this operation.
450
451
451
452
Returns:
@@ -461,6 +462,9 @@ def _min_and_max(x: common_types.TensorType,
461
462
combine_fn = np .nanmax
462
463
default_accumulator_value = (np .nan if x .dtype .is_floating else
463
464
- output_dtype .max )
465
+ elif not reduce_instance_dims and isinstance (x , tf .RaggedTensor ):
466
+ raise NotImplementedError (
467
+ 'Elemenwise min_and_max does not support RaggedTensors.' )
464
468
else :
465
469
combine_fn = np .max
466
470
default_accumulator_value = (- np .inf if x .dtype .is_floating else
@@ -478,31 +482,31 @@ def _min_and_max(x: common_types.TensorType,
478
482
479
483
480
484
def _min_and_max_per_key (
481
- x : common_types .TensorType ,
482
- key : common_types .TensorType ,
485
+ x : common_types .InputTensorType ,
486
+ key : common_types .InputTensorType ,
483
487
reduce_instance_dims : bool = True ,
484
488
key_vocabulary_filename : Optional [str ] = None ,
485
489
name : Optional [str ] = None
486
490
) -> Union [Tuple [tf .Tensor , tf .Tensor , tf .Tensor ], tf .Tensor ]:
487
- """Computes the min and max of the values of a `Tensor` or `SparseTensor `.
491
+ """Computes the min and max of the values of a `Tensor` or `CompositeTensor `.
488
492
489
- In the case of a `SparseTensor ` missing values will be used in return value:
490
- for float, NaN is used and for other dtypes the min is used.
493
+ In the case of a `CompositeTensor ` missing values will be used in return
494
+ value: for float, NaN is used and for other dtypes the min is used.
491
495
492
496
This function operates under the assumption that the size of the key set
493
497
is small enough to fit in memory. Anything above a certain size larger is not
494
498
guaranteed to be handled properly, but support for larger key sets may be
495
499
available in a future version.
496
500
497
501
Args:
498
- x: A `Tensor` or `SparseTensor `.
499
- key: A Tensor or `SparseTensor ` of dtype tf.string. If `x` is
500
- a `SparseTensor `, `key` must exactly match `x` in everything except
502
+ x: A `Tensor` or `CompositeTensor `.
503
+ key: A Tensor or `CompositeTensor ` of dtype tf.string. If `x` is a
504
+ `CompositeTensor `, `key` must exactly match `x` in everything except
501
505
values.
502
506
reduce_instance_dims: By default collapses the batch and instance dimensions
503
- to arrive at a single scalar output. If False, only collapses the batch
504
- dimension and outputs a vector of the same shape as the input.
505
- The False case is not currently supported for _min_and_max_per_key.
507
+ to arrive at a single scalar output. If False, only collapses the batch
508
+ dimension and outputs a vector of the same shape as the input. The False
509
+ case is not currently supported for _min_and_max_per_key.
506
510
key_vocabulary_filename: (Optional) The file name for the key-output mapping
507
511
file. If None and key are provided, this combiner assumes the keys fit in
508
512
memory and will not store the result in a file. If empty string, a file
@@ -528,8 +532,9 @@ def _min_and_max_per_key(
528
532
529
533
with tf .compat .v1 .name_scope (name , 'min_and_max_per_key' ):
530
534
output_dtype = x .dtype
531
- if (not reduce_instance_dims and isinstance (x , tf .SparseTensor ) and
532
- x .dtype .is_floating ):
535
+ if (not reduce_instance_dims and
536
+ isinstance (x ,
537
+ (tf .SparseTensor , tf .RaggedTensor )) and x .dtype .is_floating ):
533
538
combine_fn = np .nanmax
534
539
default_accumulator_value = (np .nan if x .dtype .is_floating else
535
540
- output_dtype .max )
@@ -572,13 +577,13 @@ def _sum_combine_fn_and_dtype(
572
577
573
578
@common .log_api_use (common .ANALYZER_COLLECTION )
574
579
def sum ( # pylint: disable=redefined-builtin
575
- x : common_types .TensorType ,
580
+ x : common_types .InputTensorType ,
576
581
reduce_instance_dims : bool = True ,
577
582
name : Optional [str ] = None ) -> tf .Tensor :
578
583
"""Computes the sum of the values of a `Tensor` over the whole dataset.
579
584
580
585
Args:
581
- x: A `Tensor` or `SparseTensor `. Its type must be floating point
586
+ x: A `Tensor` or `CompositeTensor `. Its type must be floating point
582
587
(float{16|32|64}),integral (int{8|16|32|64}), or
583
588
unsigned integral (uint{8|16})
584
589
reduce_instance_dims: By default collapses the batch and instance dimensions
@@ -600,13 +605,18 @@ def sum( # pylint: disable=redefined-builtin
600
605
if reduce_instance_dims :
601
606
if isinstance (x , tf .SparseTensor ):
602
607
x = x .values
608
+ elif isinstance (x , tf .RaggedTensor ):
609
+ x = x .flat_values
603
610
x = tf .reduce_sum (input_tensor = x )
604
611
elif isinstance (x , tf .SparseTensor ):
605
612
if x .dtype == tf .uint8 or x .dtype == tf .uint16 :
606
613
x = tf .cast (x , tf .int64 )
607
614
elif x .dtype == tf .uint32 or x .dtype == tf .uint64 :
608
615
TypeError ('Data type %r is not supported' % x .dtype )
609
616
x = tf .sparse .reduce_sum (x , axis = 0 )
617
+ elif isinstance (x , tf .RaggedTensor ):
618
+ raise NotImplementedError (
619
+ 'Elementwise sum does not support RaggedTensors.' )
610
620
else :
611
621
x = tf .reduce_sum (input_tensor = x , axis = 0 )
612
622
output_dtype , sum_fn = _sum_combine_fn_and_dtype (x .dtype )
@@ -619,7 +629,7 @@ def sum( # pylint: disable=redefined-builtin
619
629
620
630
621
631
@common .log_api_use (common .ANALYZER_COLLECTION )
622
- def histogram (x : common_types .TensorType ,
632
+ def histogram (x : common_types .InputTensorType ,
623
633
boundaries : Optional [Union [tf .Tensor , int ]] = None ,
624
634
categorical : Optional [bool ] = False ,
625
635
name : Optional [str ] = None ) -> Tuple [tf .Tensor , tf .Tensor ]:
@@ -638,7 +648,7 @@ def histogram(x: common_types.TensorType,
638
648
zip(classes, probabilities)))
639
649
640
650
Args:
641
- x: A `Tensor` or `SparseTensor `.
651
+ x: A `Tensor` or `CompositeTensor `.
642
652
boundaries: (Optional) A `Tensor` or `int` used to build the histogram;
643
653
ignored if `categorical` is True. If possible, provide boundaries as
644
654
multiple sorted values. Default to 10 intervals over the 0-1 range, or
@@ -654,7 +664,12 @@ def histogram(x: common_types.TensorType,
654
664
655
665
with tf .compat .v1 .name_scope (name , 'histogram' ):
656
666
# We need to flatten because BoostedTreesBucketize expects a rank-1 input
657
- x = x .values if isinstance (x , tf .SparseTensor ) else tf .reshape (x , [- 1 ])
667
+ if isinstance (x , tf .SparseTensor ):
668
+ x = x .values
669
+ elif isinstance (x , tf .RaggedTensor ):
670
+ x = x .flat_values
671
+ else :
672
+ x = tf .reshape (x , [- 1 ])
658
673
if categorical :
659
674
x_dtype = x .dtype
660
675
x = x if x_dtype == tf .string else tf .strings .as_string (x )
@@ -687,13 +702,13 @@ def histogram(x: common_types.TensorType,
687
702
688
703
689
704
@common .log_api_use (common .ANALYZER_COLLECTION )
690
- def size (x : common_types .TensorType ,
705
+ def size (x : common_types .InputTensorType ,
691
706
reduce_instance_dims : bool = True ,
692
707
name : Optional [str ] = None ) -> tf .Tensor :
693
708
"""Computes the total size of instances in a `Tensor` over the whole dataset.
694
709
695
710
Args:
696
- x: A `Tensor` or `SparseTensor `.
711
+ x: A `Tensor` or `CompositeTensor `.
697
712
reduce_instance_dims: By default collapses the batch and instance dimensions
698
713
to arrive at a single scalar output. If False, only collapses the batch
699
714
dimension and outputs a vector of the same shape as the input.
@@ -715,13 +730,13 @@ def size(x: common_types.TensorType,
715
730
716
731
717
732
@common .log_api_use (common .ANALYZER_COLLECTION )
718
- def count_per_key (key : common_types .TensorType ,
733
+ def count_per_key (key : common_types .InputTensorType ,
719
734
key_vocabulary_filename : Optional [str ] = None ,
720
735
name : Optional [str ] = None ):
721
736
"""Computes the count of each element of a `Tensor`.
722
737
723
738
Args:
724
- key: A Tensor or `SparseTensor ` of dtype tf.string or tf.int.
739
+ key: A Tensor or `CompositeTensor ` of dtype tf.string or tf.int.
725
740
key_vocabulary_filename: (Optional) The file name for the key-output mapping
726
741
file. If None and key are provided, this combiner assumes the keys fit in
727
742
memory and will not store the result in a file. If empty string, a file
@@ -764,14 +779,14 @@ def count_per_key(key: common_types.TensorType,
764
779
765
780
766
781
@common .log_api_use (common .ANALYZER_COLLECTION )
767
- def mean (x : common_types .TensorType ,
782
+ def mean (x : common_types .InputTensorType ,
768
783
reduce_instance_dims : bool = True ,
769
784
name : Optional [str ] = None ,
770
785
output_dtype : Optional [tf .DType ] = None ) -> tf .Tensor :
771
786
"""Computes the mean of the values of a `Tensor` over the whole dataset.
772
787
773
788
Args:
774
- x: A `Tensor` or `SparseTensor `. Its type must be floating point
789
+ x: A `Tensor` or `CompositeTensor `. Its type must be floating point
775
790
(float{16|32|64}), or integral ([u]int{8|16|32|64}).
776
791
reduce_instance_dims: By default collapses the batch and instance dimensions
777
792
to arrive at a single scalar output. If False, only collapses the batch
@@ -792,7 +807,7 @@ def mean(x: common_types.TensorType,
792
807
793
808
794
809
@common .log_api_use (common .ANALYZER_COLLECTION )
795
- def var (x : common_types .TensorType ,
810
+ def var (x : common_types .InputTensorType ,
796
811
reduce_instance_dims : bool = True ,
797
812
name : Optional [str ] = None ,
798
813
output_dtype : Optional [tf .DType ] = None ) -> tf .Tensor :
@@ -802,7 +817,7 @@ def var(x: common_types.TensorType,
802
817
(x - mean(x))**2 / length(x).
803
818
804
819
Args:
805
- x: `Tensor` or `SparseTensor `. Its type must be floating point
820
+ x: `Tensor` or `CompositeTensor `. Its type must be floating point
806
821
(float{16|32|64}), or integral ([u]int{8|16|32|64}).
807
822
reduce_instance_dims: By default collapses the batch and instance dimensions
808
823
to arrive at a single scalar output. If False, only collapses the batch
@@ -822,12 +837,17 @@ def var(x: common_types.TensorType,
822
837
return _mean_and_var (x , reduce_instance_dims , output_dtype )[1 ]
823
838
824
839
825
- def _mean_and_var (x , reduce_instance_dims = True , output_dtype = None ):
840
+ def _mean_and_var (x : common_types .InputTensorType ,
841
+ reduce_instance_dims : bool = True ,
842
+ output_dtype : Optional [tf .DType ] = None ):
826
843
"""More efficient combined `mean` and `var`. See `var`."""
827
844
if output_dtype is None :
828
845
output_dtype = _FLOAT_OUTPUT_DTYPE_MAP .get (x .dtype )
829
846
if output_dtype is None :
830
847
raise TypeError ('Tensor type %r is not supported' % x .dtype )
848
+ if not reduce_instance_dims and isinstance (x , tf .RaggedTensor ):
849
+ raise NotImplementedError (
850
+ 'Elementwise mean_and_var does not support RaggedTensors.' )
831
851
832
852
with tf .compat .v1 .name_scope ('mean_and_var' ):
833
853
@@ -1007,8 +1027,8 @@ def _tukey_parameters(
1007
1027
1008
1028
1009
1029
def _mean_and_var_per_key (
1010
- x : common_types .TensorType ,
1011
- key : common_types .TensorType ,
1030
+ x : common_types .InputTensorType ,
1031
+ key : common_types .InputTensorType ,
1012
1032
reduce_instance_dims : bool = True ,
1013
1033
output_dtype : Optional [tf .DType ] = None ,
1014
1034
key_vocabulary_filename : Optional [str ] = None
@@ -1017,9 +1037,9 @@ def _mean_and_var_per_key(
1017
1037
"""`mean_and_var` by group, specified by key.
1018
1038
1019
1039
Args:
1020
- x: A `Tensor` or `SparseTensor `.
1021
- key: A Tensor or `SparseTensor ` of dtype tf.string. If `x` is
1022
- a `SparseTensor `, `key` must exactly match `x` in everything except
1040
+ x: A `Tensor` or `CompositeTensor `.
1041
+ key: A Tensor or `CompositeTensor ` of dtype tf.string. If `x` is
1042
+ a `CompositeTensor `, `key` must exactly match `x` in everything except
1023
1043
values.
1024
1044
reduce_instance_dims: (Optional) By default collapses the batch and instance
1025
1045
dimensions to arrive at a single scalar output. The False case is not
0 commit comments