Add a new model type "materialized_prediction" to identify explicit materialized prediction extractor.

genehwung · tfx-copybara · commit 5341e918c59c · 2023-08-02T22:57:23.000-07:00
PiperOrigin-RevId: 553366496
diff --git a/RELEASE.md b/RELEASE.md
@@ -24,6 +24,9 @@
     eval_saved_model, allowing signature='eval' to now be used with other model
     types.
 
+*   Add "materialized_prediction" model type to allow users bypassing model
+    inference explicitly.
+
 ## Breaking Changes
 
 *   Depend on PIL for image related metrics.
diff --git a/tensorflow_model_analysis/api/model_eval_lib.py b/tensorflow_model_analysis/api/model_eval_lib.py
@@ -573,6 +573,18 @@ def default_extractors(  # pylint: disable=invalid-name
           eval_config=eval_config, materialize=materialize)
   ])
 
+  extract_features = features_extractor.FeaturesExtractor(
+      eval_config=eval_config, tensor_representations=tensor_representations
+  )
+  extract_labels = labels_extractor.LabelsExtractor(eval_config=eval_config)
+  extract_example_weights = example_weights_extractor.ExampleWeightsExtractor(
+      eval_config=eval_config
+  )
+  extract_materialized_predictions = (
+      materialized_predictions_extractor.MaterializedPredictionsExtractor(
+          eval_config=eval_config
+      )
+  )
   if eval_shared_model:
     model_types = _model_types(eval_shared_models)
     logging.info('eval_shared_models have model_types: %s', model_types)
@@ -582,21 +594,29 @@ def default_extractors(  # pylint: disable=invalid-name
           'either a custom_predict_extractor must be used or model type must '
           'be one of: {}. evalconfig={}'.format(
               str(constants.VALID_TF_MODEL_TYPES), eval_config))
-    if model_types == {constants.TF_LITE}:
+    if model_types == {constants.MATERIALIZED_PREDICTION}:
+      return [
+          extract_features,
+          extract_labels,
+          extract_example_weights,
+          extract_materialized_predictions,
+      ] + slicing_extractors
+    elif model_types == {constants.TF_LITE}:
       # TODO(b/163889779): Convert TFLite extractor to operate on batched
       # extracts. Then we can remove the input extractor.
       return [
-          features_extractor.FeaturesExtractor(
-              eval_config=eval_config,
-              tensor_representations=tensor_representations),
+          extract_features,
           transformed_features_extractor.TransformedFeaturesExtractor(
-              eval_config=eval_config, eval_shared_model=eval_shared_model),
-          labels_extractor.LabelsExtractor(eval_config=eval_config),
-          example_weights_extractor.ExampleWeightsExtractor(
-              eval_config=eval_config),
-          (custom_predict_extractor or
-           tflite_predict_extractor.TFLitePredictExtractor(
-               eval_config=eval_config, eval_shared_model=eval_shared_model))
+              eval_config=eval_config, eval_shared_model=eval_shared_model
+          ),
+          extract_labels,
+          extract_example_weights,
+          (
+              custom_predict_extractor
+              or tflite_predict_extractor.TFLitePredictExtractor(
+                  eval_config=eval_config, eval_shared_model=eval_shared_model
+              )
+          ),
       ] + slicing_extractors
     elif constants.TF_LITE in model_types:
       raise NotImplementedError(
@@ -605,15 +625,15 @@ def default_extractors(  # pylint: disable=invalid-name
 
     if model_types == {constants.TF_JS}:
       return [
-          features_extractor.FeaturesExtractor(
-              eval_config=eval_config,
-              tensor_representations=tensor_representations),
-          labels_extractor.LabelsExtractor(eval_config=eval_config),
-          example_weights_extractor.ExampleWeightsExtractor(
-              eval_config=eval_config),
-          (custom_predict_extractor or
-           tfjs_predict_extractor.TFJSPredictExtractor(
-               eval_config=eval_config, eval_shared_model=eval_shared_model))
+          extract_features,
+          extract_labels,
+          extract_example_weights,
+          (
+              custom_predict_extractor
+              or tfjs_predict_extractor.TFJSPredictExtractor(
+                  eval_config=eval_config, eval_shared_model=eval_shared_model
+              )
+          ),
       ] + slicing_extractors
     elif constants.TF_JS in model_types:
       raise NotImplementedError(
@@ -646,35 +666,29 @@ def default_extractors(  # pylint: disable=invalid-name
           'implemented: eval_config={}'.format(eval_config)
       )
     else:
-      extractors = [
-          features_extractor.FeaturesExtractor(
-              eval_config=eval_config,
-              tensor_representations=tensor_representations)
-      ]
+      extractors = [extract_features]
       if not custom_predict_extractor:
         extractors.append(
             transformed_features_extractor.TransformedFeaturesExtractor(
                 eval_config=eval_config, eval_shared_model=eval_shared_model))
       extractors.extend([
-          labels_extractor.LabelsExtractor(eval_config=eval_config),
-          example_weights_extractor.ExampleWeightsExtractor(
-              eval_config=eval_config),
-          (custom_predict_extractor or
-           predictions_extractor.PredictionsExtractor(
-               eval_config=eval_config, eval_shared_model=eval_shared_model)),
+          extract_labels,
+          extract_example_weights,
+          (
+              custom_predict_extractor
+              or predictions_extractor.PredictionsExtractor(
+                  eval_config=eval_config, eval_shared_model=eval_shared_model
+              )
+          ),
       ])
       extractors.extend(slicing_extractors)
       return extractors
   else:
     return [
-        features_extractor.FeaturesExtractor(
-            eval_config=eval_config,
-            tensor_representations=tensor_representations),
-        labels_extractor.LabelsExtractor(eval_config=eval_config),
-        example_weights_extractor.ExampleWeightsExtractor(
-            eval_config=eval_config),
-        materialized_predictions_extractor.MaterializedPredictionsExtractor(
-            eval_config),
+        extract_features,
+        extract_labels,
+        extract_example_weights,
+        extract_materialized_predictions,
     ] + slicing_extractors
 
 
diff --git a/tensorflow_model_analysis/api/model_eval_lib_test.py b/tensorflow_model_analysis/api/model_eval_lib_test.py
@@ -714,7 +714,13 @@ def testRunModelAnalysisMultipleModels(self):
     self.assertMetricsAlmostEqual(eval_result_2.slicing_metrics,
                                   expected_result_2)
 
-  def testRunModelAnalysisWithModelAgnosticPredictions(self):
+  @parameterized.named_parameters(
+      ('no_model', False, None),
+      ('has_a_model', True, constants.MATERIALIZED_PREDICTION),
+  )
+  def testRunModelAnalysisWithExplicitModelAgnosticPredictions(
+      self, has_model, model_type
+  ):
     examples = [
         self._makeExample(
             age=3.0, language='english', label=1.0, prediction=0.9),
@@ -725,13 +731,6 @@ def testRunModelAnalysisWithModelAgnosticPredictions(self):
         self._makeExample(
             age=5.0, language='chinese', label=1.0, prediction=0.2)
     ]
-    data_location = self._writeTFExamplesToTFRecords(examples)
-    model_specs = [
-        config_pb2.ModelSpec(
-            prediction_key='prediction',
-            label_key='label',
-            example_weight_key='age')
-    ]
     metrics_specs = [
         config_pb2.MetricsSpec(
             metrics=[config_pb2.MetricConfig(class_name='ExampleCount')],
@@ -746,41 +745,56 @@ def testRunModelAnalysisWithModelAgnosticPredictions(self):
             example_weights=config_pb2.ExampleWeightOptions(weighted=True))
     ]
     slicing_specs = [config_pb2.SlicingSpec(feature_keys=['language'])]
+    model_spec = config_pb2.ModelSpec(
+        prediction_key='prediction',
+        label_key='label',
+        example_weight_key='age',
+    )
+    if model_type is not None:
+      model_spec.model_type = model_type
     eval_config = config_pb2.EvalConfig(
-        model_specs=model_specs,
+        model_specs=[model_spec],
         metrics_specs=metrics_specs,
-        slicing_specs=slicing_specs)
-    eval_result = model_eval_lib.run_model_analysis(
-        eval_config=eval_config,
-        data_location=data_location,
-        output_path=self._getTempDir())
+        slicing_specs=slicing_specs,
+    )
+    data_location = self._writeTFExamplesToTFRecords(examples)
+    if has_model:
+      model_location = self._exportEvalSavedModel(
+          linear_classifier.simple_linear_classifier
+      )
+      model = model_eval_lib.default_eval_shared_model(
+          eval_saved_model_path=model_location,
+          eval_config=eval_config,
+      )
+      eval_result = model_eval_lib.run_model_analysis(
+          eval_shared_model=model,
+          eval_config=eval_config,
+          data_location=data_location,
+          output_path=self._getTempDir(),
+      )
+    else:
+      eval_result = model_eval_lib.run_model_analysis(
+          eval_config=eval_config,
+          data_location=data_location,
+          output_path=self._getTempDir(),
+      )
     expected = {
         (('language', 'chinese'),): {
-            'binary_accuracy': {
-                'doubleValue': 0.375
-            },
-            'weighted_example_count': {
-                'doubleValue': 8.0
-            },
-            'example_count': {
-                'doubleValue': 2.0
-            },
+            'binary_accuracy': {'doubleValue': 0.375},
+            'weighted_example_count': {'doubleValue': 8.0},
+            'example_count': {'doubleValue': 2.0},
         },
         (('language', 'english'),): {
-            'binary_accuracy': {
-                'doubleValue': 1.0
-            },
-            'weighted_example_count': {
-                'doubleValue': 7.0
-            },
-            'example_count': {
-                'doubleValue': 2.0
-            },
-        }
+            'binary_accuracy': {'doubleValue': 1.0},
+            'weighted_example_count': {'doubleValue': 7.0},
+            'example_count': {'doubleValue': 2.0},
+        },
     }
     self.assertEqual(eval_result.data_location, data_location)
-    self.assertEqual(eval_result.config.slicing_specs[0],
-                     config_pb2.SlicingSpec(feature_keys=['language']))
+    self.assertEqual(
+        eval_result.config.slicing_specs[0],
+        config_pb2.SlicingSpec(feature_keys=['language']),
+    )
     self.assertMetricsAlmostEqual(eval_result.slicing_metrics, expected)
 
   @parameterized.named_parameters(
diff --git a/tensorflow_model_analysis/constants.py b/tensorflow_model_analysis/constants.py
@@ -32,8 +32,16 @@
 TF_GENERIC = 'tf_generic'
 TF_LITE = 'tf_lite'
 TF_JS = 'tf_js'
-VALID_TF_MODEL_TYPES = (TFMA_EVAL, TF_GENERIC, TF_ESTIMATOR, TF_KERAS, TF_LITE,
-                        TF_JS)
+MATERIALIZED_PREDICTION = 'materialized_prediction'
+VALID_TF_MODEL_TYPES = (
+    TFMA_EVAL,
+    TF_GENERIC,
+    TF_ESTIMATOR,
+    TF_KERAS,
+    TF_LITE,
+    TF_JS,
+    MATERIALIZED_PREDICTION,
+)
 
 # This constant is only used for telemetry
 MODEL_AGNOSTIC = 'model_agnostic'