Update tfma.default_eval_shared_model and tfma.default_extractors to better support custom model types.

mdreves · tf-model-analysis-team · commit ff5112840714 · 2020-06-20T10:34:47.000-07:00
PiperOrigin-RevId: 317473286
diff --git a/RELEASE.md b/RELEASE.md
@@ -40,6 +40,8 @@
 *   Add `tfma.slicer.stringify_slice_key()`.
 *   Deprecated external use of tfma.slicer.SingleSliceSpec (tfma.SlicingSpec
     should be used instead).
+*   Updated tfma.default_eval_shared_model and tfma.default_extractors to better
+    support custom model types.
 
 ## Breaking changes
 
@@ -48,6 +50,8 @@
 *   Refactored confidence interval methodology field. The old path under
     `Options.confidence_interval_methodology` is now at
     `Options.confidence_intervals.methodology`.
+*   Removed model_load_time_callback from ModelLoader construct_fn (timing is
+    now handled by load). Removed access to shared_handle from ModelLoader.
 
 ## Deprecations
 
diff --git a/tensorflow_model_analysis/__init__.py b/tensorflow_model_analysis/__init__.py
@@ -97,6 +97,8 @@
 
 from tensorflow_model_analysis.post_export_metrics import post_export_metrics
 
+from tensorflow_model_analysis.model_util import CombineFnWithModels
+from tensorflow_model_analysis.model_util import DoFnWithModels
 from tensorflow_model_analysis.model_util import get_model_type
 from tensorflow_model_analysis.model_util import model_construct_fn
 from tensorflow_model_analysis.model_util import verify_and_update_eval_shared_models
diff --git a/tensorflow_model_analysis/api/model_eval_lib.py b/tensorflow_model_analysis/api/model_eval_lib.py
@@ -291,7 +291,9 @@ def default_eval_shared_model(
     blacklist_feature_fetches: Optional[List[Text]] = None,
     tags: Optional[List[Text]] = None,
     model_name: Text = '',
-    eval_config: Optional[config.EvalConfig] = None) -> types.EvalSharedModel:
+    eval_config: Optional[config.EvalConfig] = None,
+    custom_model_loader: Optional[types.ModelLoader] = None
+) -> types.EvalSharedModel:
   """Returns default EvalSharedModel.
 
   Args:
@@ -318,6 +320,7 @@ def default_eval_shared_model(
       ModelSpecs.name). The name should only be provided if multiple models are
       being evaluated.
     eval_config: Eval config. Only used for setting default tags.
+    custom_model_loader: Optional custom model loader for non-TF models.
   """
   if not eval_config:
     model_type = constants.TF_ESTIMATOR
@@ -360,6 +363,19 @@ def default_eval_shared_model(
         add_metrics_callbacks.append(example_weight_callback)
     # pytype: enable=module-attr
 
+  model_loader = custom_model_loader
+  if not model_loader and model_type in constants.VALID_TF_MODEL_TYPES:
+    model_loader = types.ModelLoader(
+        construct_fn=model_util.model_construct_fn(
+            eval_saved_model_path=eval_saved_model_path,
+            add_metrics_callbacks=add_metrics_callbacks,
+            include_default_metrics=include_default_metrics,
+            additional_fetches=additional_fetches,
+            blacklist_feature_fetches=blacklist_feature_fetches,
+            model_type=model_type,
+            tags=tags),
+        tags=tags)
+
   return types.EvalSharedModel(
       model_name=model_name,
       model_type=model_type,
@@ -368,16 +384,7 @@ def default_eval_shared_model(
       include_default_metrics=include_default_metrics,
       example_weight_key=example_weight_key,
       additional_fetches=additional_fetches,
-      model_loader=types.ModelLoader(
-          tags=tags,
-          construct_fn=model_util.model_construct_fn(
-              eval_saved_model_path=eval_saved_model_path,
-              add_metrics_callbacks=add_metrics_callbacks,
-              include_default_metrics=include_default_metrics,
-              additional_fetches=additional_fetches,
-              blacklist_feature_fetches=blacklist_feature_fetches,
-              model_type=model_type,
-              tags=tags)))
+      model_loader=model_loader)
 
 
 def default_extractors(  # pylint: disable=invalid-name
@@ -387,6 +394,7 @@ def default_extractors(  # pylint: disable=invalid-name
     materialize: Optional[bool] = True,
     enable_batched_extractors: Optional[bool] = False,
     tensor_adapter_config: Optional[tensor_adapter.TensorAdapterConfig] = None,
+    custom_predict_extractor: Optional[extractor.Extractor] = None
 ) -> List[extractor.Extractor]:
   """Returns the default extractors for use in ExtractAndEvaluate.
 
@@ -401,6 +409,8 @@ def default_extractors(  # pylint: disable=invalid-name
     tensor_adapter_config: Tensor adapter config which specifies how to obtain
       tensors from the Arrow RecordBatch. If None, we feed the raw examples to
       the model.
+    custom_predict_extractor: Optional custom predict extractor for non-TF
+      models.
 
   Raises:
     NotImplementedError: If eval_config contains mixed serving and eval models.
@@ -417,7 +427,7 @@ def default_extractors(  # pylint: disable=invalid-name
       eval_config = config.EvalConfig(
           slicing_specs=[s.to_proto() for s in slice_spec])
     return [
-        predict_extractor.PredictExtractor(
+        custom_predict_extractor or predict_extractor.PredictExtractor(
             eval_shared_model, materialize=materialize),
         slice_key_extractor.SliceKeyExtractor(
             eval_config=eval_config, materialize=materialize)
@@ -427,15 +437,18 @@ def default_extractors(  # pylint: disable=invalid-name
     eval_shared_models = model_util.verify_and_update_eval_shared_models(
         eval_shared_model)
 
-    if not model_types.issubset(constants.VALID_MODEL_TYPES):
+    if (not model_types.issubset(constants.VALID_TF_MODEL_TYPES) and
+        not custom_predict_extractor):
       raise NotImplementedError(
-          'model type must be one of: {}. evalconfig={}'.format(
-              str(constants.VALID_MODEL_TYPES), eval_config))
+          'either a custom_predict_extractor must be used or model type must '
+          'be one of: {}. evalconfig={}'.format(
+              str(constants.VALID_TF_MODEL_TYPES), eval_config))
     if model_types == set([constants.TF_LITE]):
       return [
           input_extractor.InputExtractor(eval_config=eval_config),
-          tflite_predict_extractor.TFLitePredictExtractor(
-              eval_config=eval_config, eval_shared_model=eval_shared_model),
+          (custom_predict_extractor or
+           tflite_predict_extractor.TFLitePredictExtractor(
+               eval_config=eval_config, eval_shared_model=eval_shared_model)),
           slice_key_extractor.SliceKeyExtractor(
               eval_config=eval_config, materialize=materialize)
       ]
@@ -448,7 +461,7 @@ def default_extractors(  # pylint: disable=invalid-name
           all(eval_constants.EVAL_TAG in m.model_loader.tags
               for m in eval_shared_models)):
       return [
-          predict_extractor.PredictExtractor(
+          custom_predict_extractor or predict_extractor.PredictExtractor(
               eval_shared_model,
               materialize=materialize,
               eval_config=eval_config),
@@ -466,18 +479,19 @@ def default_extractors(  # pylint: disable=invalid-name
         return [
             batched_input_extractor.BatchedInputExtractor(
                 eval_config=eval_config),
-            batched_predict_extractor_v2.BatchedPredictExtractor(
-                eval_config=eval_config,
-                eval_shared_model=eval_shared_model,
-                tensor_adapter_config=tensor_adapter_config),
+            (custom_predict_extractor or
+             batched_predict_extractor_v2.BatchedPredictExtractor(
+                 eval_config=eval_config,
+                 eval_shared_model=eval_shared_model,
+                 tensor_adapter_config=tensor_adapter_config)),
             unbatch_extractor.UnbatchExtractor(),
             slice_key_extractor.SliceKeyExtractor(
                 eval_config=eval_config, materialize=materialize)
         ]
       else:
         return [
             input_extractor.InputExtractor(eval_config=eval_config),
-            predict_extractor_v2.PredictExtractor(
+            custom_predict_extractor or predict_extractor_v2.PredictExtractor(
                 eval_config=eval_config, eval_shared_model=eval_shared_model),
             slice_key_extractor.SliceKeyExtractor(
                 eval_config=eval_config, materialize=materialize)
diff --git a/tensorflow_model_analysis/api/model_eval_lib_test.py b/tensorflow_model_analysis/api/model_eval_lib_test.py
@@ -367,6 +367,89 @@ def testRunModelAnalysis(self):
     self.assertMetricsAlmostEqual(eval_result.slicing_metrics, expected)
     self.assertFalse(eval_result.plots)
 
+  def testRunModelAnalysisWithCustomizations(self):
+    model_location = self._exportEvalSavedModel(
+        linear_classifier.simple_linear_classifier)
+    examples = [
+        self._makeExample(age=3.0, language='english', label=1.0),
+        self._makeExample(age=3.0, language='chinese', label=0.0),
+        self._makeExample(age=4.0, language='english', label=1.0),
+        self._makeExample(age=5.0, language='chinese', label=1.0),
+        self._makeExample(age=5.0, language='hindi', label=1.0)
+    ]
+    data_location = self._writeTFExamplesToTFRecords(examples)
+    slicing_specs = [config.SlicingSpec(feature_keys=['language'])]
+    options = config.Options()
+    options.min_slice_size.value = 2
+    eval_config = config.EvalConfig(
+        model_specs=[config.ModelSpec(model_type='my_model_type')],
+        slicing_specs=slicing_specs,
+        options=options)
+    # Use default model_loader for testing passing custom_model_loader
+    model_loader = model_eval_lib.default_eval_shared_model(
+        eval_saved_model_path=model_location,
+        example_weight_key='age').model_loader
+    eval_shared_model = model_eval_lib.default_eval_shared_model(
+        eval_saved_model_path=model_location, custom_model_loader=model_loader)
+    # Use PredictExtractor for testing passing custom_predict_extractor
+    extractors = model_eval_lib.default_extractors(
+        eval_shared_model=eval_shared_model,
+        eval_config=eval_config,
+        custom_predict_extractor=predict_extractor.PredictExtractor(
+            eval_shared_model=eval_shared_model, eval_config=eval_config))
+    eval_result = model_eval_lib.run_model_analysis(
+        eval_config=eval_config,
+        eval_shared_model=eval_shared_model,
+        data_location=data_location,
+        output_path=self._getTempDir(),
+        extractors=extractors)
+    # We only check some of the metrics to ensure that the end-to-end
+    # pipeline works.
+    expected = {
+        (('language', 'hindi'),): {
+            u'__ERROR__': {
+                'debugMessage':
+                    u'Example count for this slice key is lower than the '
+                    u'minimum required value: 2. No data is aggregated for '
+                    u'this slice.'
+            },
+        },
+        (('language', 'chinese'),): {
+            'accuracy': {
+                'doubleValue': 0.5
+            },
+            'my_mean_label': {
+                'doubleValue': 0.5
+            },
+            metric_keys.EXAMPLE_WEIGHT: {
+                'doubleValue': 8.0
+            },
+            metric_keys.EXAMPLE_COUNT: {
+                'doubleValue': 2.0
+            },
+        },
+        (('language', 'english'),): {
+            'accuracy': {
+                'doubleValue': 1.0
+            },
+            'my_mean_label': {
+                'doubleValue': 1.0
+            },
+            metric_keys.EXAMPLE_WEIGHT: {
+                'doubleValue': 7.0
+            },
+            metric_keys.EXAMPLE_COUNT: {
+                'doubleValue': 2.0
+            },
+        }
+    }
+    self.assertEqual(eval_result.model_location, model_location.decode())
+    self.assertEqual(eval_result.data_location, data_location)
+    self.assertEqual(eval_result.config.slicing_specs[0],
+                     config.SlicingSpec(feature_keys=['language']))
+    self.assertMetricsAlmostEqual(eval_result.slicing_metrics, expected)
+    self.assertFalse(eval_result.plots)
+
   def testRunModelAnalysisMultipleModels(self):
     examples = [
         self._makeExample(age=3.0, language='english', label=1.0),
diff --git a/tensorflow_model_analysis/constants.py b/tensorflow_model_analysis/constants.py
@@ -27,12 +27,12 @@
 PLACEHOLDER = 'placeholder'
 SPARSE_PLACEHOLDER = 'sparse_placeholder'
 
-# Types of models
+# Types of TF models
 TF_ESTIMATOR = 'tf_estimator'
 TF_KERAS = 'tf_keras'
 TF_GENERIC = 'tf_generic'
 TF_LITE = 'tf_lite'
-VALID_MODEL_TYPES = ('', TF_GENERIC, TF_ESTIMATOR, TF_KERAS, TF_LITE)
+VALID_TF_MODEL_TYPES = (TF_GENERIC, TF_ESTIMATOR, TF_KERAS, TF_LITE)
 
 # LINT.IfChange
 METRICS_NAMESPACE = 'tfx.ModelAnalysis'
diff --git a/tensorflow_model_analysis/evaluators/keras_util.py b/tensorflow_model_analysis/evaluators/keras_util.py
@@ -31,7 +31,7 @@ def metrics_specs_from_keras(
     model_loader: types.ModelLoader,
 ) -> List[config.MetricsSpec]:
   """Returns metrics specs for metrics and losses associated with the model."""
-  model = model_loader.construct_fn(lambda x: None)()
+  model = model_loader.construct_fn()
   if model is None:
     return []
 
diff --git a/tensorflow_model_analysis/evaluators/keras_util_test.py b/tensorflow_model_analysis/evaluators/keras_util_test.py
@@ -69,7 +69,7 @@ def testMetricSpecsFromKeras(self):
 
     # TODO(b/149995449): Keras does not support re-loading metrics with the new
     #   API. Re-enable after this is fixed.
-    model = eval_shared_model.model_loader.construct_fn(lambda x: None)()
+    model = eval_shared_model.model_loader.construct_fn()
     if not hasattr(model, 'loss_functions'):
       return
 
@@ -122,7 +122,7 @@ def testMetricSpecsFromKerasSequential(self):
 
     # TODO(b/149995449): Keras does not support re-loading metrics with the new
     #   API. Re-enable after this is fixed.
-    model = eval_shared_model.model_loader.construct_fn(lambda x: None)()
+    model = eval_shared_model.model_loader.construct_fn()
     if not hasattr(model, 'loss_functions'):
       return
 
@@ -184,7 +184,7 @@ def testMetricSpecsFromKerasWithMultipleOutputs(self):
 
     # TODO(b/149995449): Keras does not support re-loading metrics with the new
     #   API. Re-enable after this is fixed.
-    model = eval_shared_model.model_loader.construct_fn(lambda x: None)()
+    model = eval_shared_model.model_loader.construct_fn()
     if not hasattr(model, 'loss_functions'):
       return
 
diff --git a/tensorflow_model_analysis/evaluators/metrics_and_plots_evaluator_v2_test.py b/tensorflow_model_analysis/evaluators/metrics_and_plots_evaluator_v2_test.py
@@ -307,9 +307,8 @@ def check_validations(got):
           # TODO(b/149995449): Keras does not support re-loading metrics with
           # its new API so the loss added at compile time will be missing.
           # Re-enable after this is fixed.
-          if hasattr(
-              eval_shared_model.model_loader.construct_fn(lambda x: None)(),
-              'compiled_metrics'):
+          if hasattr(eval_shared_model.model_loader.construct_fn(),
+                     'compiled_metrics'):
             expected_metric_validations_per_slice = (
                 expected_metric_validations_per_slice[:3])
           self.assertLen(got.metric_validations_per_slice[0].failures,
diff --git a/tensorflow_model_analysis/model_agnostic_eval/model_agnostic_evaluate_graph.py b/tensorflow_model_analysis/model_agnostic_eval/model_agnostic_evaluate_graph.py
@@ -25,35 +25,24 @@
 # Standard __future__ imports
 from __future__ import print_function
 
-import datetime
+from typing import List, Optional  # pytype: disable=not-supported-yet
+
 # Standard Imports
 import tensorflow as tf
 
 from tensorflow_model_analysis import types
 from tensorflow_model_analysis.eval_metrics_graph import eval_metrics_graph
 from tensorflow_model_analysis.model_agnostic_eval import model_agnostic_predict
 
-from typing import Callable, List, Optional  # pytype: disable=not-supported-yet
-
 
 def make_construct_fn(  # pylint: disable=invalid-name
     add_metrics_callbacks: Optional[List[types.AddMetricsCallbackType]],
     config: model_agnostic_predict.ModelAgnosticConfig):
   """Returns a construct fn for constructing the model agnostic eval graph."""
 
-  def construct_fn(model_load_seconds_callback: Callable[[int], None]):
-    """Thin wrapper for the actual construct to allow for metrics."""
-
-    def construct():  # pylint: disable=invalid-name
-      """Function for constructing a model agnostic eval graph."""
-      start_time = datetime.datetime.now()
-      model_agnostic_eval = ModelAgnosticEvaluateGraph(add_metrics_callbacks,
-                                                       config)
-      end_time = datetime.datetime.now()
-      model_load_seconds_callback(int((end_time - start_time).total_seconds()))
-      return model_agnostic_eval
-
-    return construct
+  def construct_fn():  # pylint: disable=invalid-name
+    """Function for constructing a model agnostic eval graph."""
+    return ModelAgnosticEvaluateGraph(add_metrics_callbacks, config)
 
   return construct_fn
 
diff --git a/tensorflow_model_analysis/model_util.py b/tensorflow_model_analysis/model_util.py
diff --git a/tensorflow_model_analysis/types.py b/tensorflow_model_analysis/types.py