Add a TextToImagePreprocessor base class (#2181)

mattdangerw · web-flow · commit 09759829c5fa · 2025-03-31T17:28:26.000-07:00
Minor bit of bookeeping, adds a `TextToImagePreprocessor` base class.
Not moving any common functionality here yet until we have at least two
image generation models, but for now this will allow the auto class
functionality to work.
diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py
@@ -369,6 +369,9 @@
     TextClassifierPreprocessor,
 )
 from keras_hub.src.models.text_to_image import TextToImage
+from keras_hub.src.models.text_to_image_preprocessor import (
+    TextToImagePreprocessor,
+)
 from keras_hub.src.models.vgg.vgg_backbone import VGGBackbone
 from keras_hub.src.models.vgg.vgg_image_classifier import VGGImageClassifier
 from keras_hub.src.models.vgg.vgg_image_classifier_preprocessor import (
diff --git a/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py b/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_preprocessor.py
@@ -2,14 +2,16 @@
 from keras import layers
 
 from keras_hub.src.api_export import keras_hub_export
-from keras_hub.src.models.preprocessor import Preprocessor
 from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_backbone import (  # noqa: E501
     StableDiffusion3Backbone,
 )
+from keras_hub.src.models.text_to_image_preprocessor import (
+    TextToImagePreprocessor,
+)
 
 
 @keras_hub_export("keras_hub.models.StableDiffusion3TextToImagePreprocessor")
-class StableDiffusion3TextToImagePreprocessor(Preprocessor):
+class StableDiffusion3TextToImagePreprocessor(TextToImagePreprocessor):
     """Stable Diffusion 3 text-to-image model preprocessor.
 
     This preprocessing layer is meant for use with
diff --git a/keras_hub/src/models/text_to_image_preprocessor.py b/keras_hub/src/models/text_to_image_preprocessor.py
@@ -0,0 +1,35 @@
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.preprocessor import Preprocessor
+
+
+@keras_hub_export("keras_hub.models.TextToImagePreprocessor")
+class TextToImagePreprocessor(Preprocessor):
+    """Base class for text to image preprocessing layers.
+
+    `TextToImagePreprocessor` tasks wrap a `keras_hub.tokenizer.Tokenizer` to
+    create a preprocessing layer for text to image tasks. It is intended to be
+    paired with a `keras_hub.models.TextToImage` task.
+
+    The exact specifics of this layer will vary depending on the subclass
+    implementation per model architecture. Generally, it will take text input,
+    and tokenize, then pad/truncate so it is ready to be fed to a image
+    generation model (e.g. a diffusion model).
+
+    Examples.
+    ```python
+    preprocessor = keras_hub.models.TextToImagePreprocessor.from_preset(
+        "stable_diffusion_3_medium",
+        sequence_length=256, # Optional.
+    )
+
+    # Tokenize and pad/truncate a single sentence.
+    x = "The quick brown fox jumped."
+    x = preprocessor(x)
+
+    # Tokenize and pad/truncate a batch of sentences.
+    x = ["The quick brown fox jumped."]
+    x = preprocessor(x)
+    ```
+    """
+
+    pass
diff --git a/keras_hub/src/models/text_to_image_preprocessor_test.py b/keras_hub/src/models/text_to_image_preprocessor_test.py
@@ -0,0 +1,35 @@
+import pytest
+
+from keras_hub.src.models.stable_diffusion_3.stable_diffusion_3_text_to_image_preprocessor import (  # noqa: E501
+    StableDiffusion3TextToImagePreprocessor,
+)
+from keras_hub.src.models.text_to_image_preprocessor import (
+    TextToImagePreprocessor,
+)
+from keras_hub.src.tests.test_case import TestCase
+
+
+class TestTextToImagePreprocessor(TestCase):
+    @pytest.mark.large
+    def test_from_preset(self):
+        self.assertIsInstance(
+            TextToImagePreprocessor.from_preset("stable_diffusion_3_medium"),
+            StableDiffusion3TextToImagePreprocessor,
+        )
+        self.assertIsInstance(
+            StableDiffusion3TextToImagePreprocessor.from_preset(
+                "stable_diffusion_3_medium"
+            ),
+            StableDiffusion3TextToImagePreprocessor,
+        )
+
+    @pytest.mark.large
+    def test_from_preset_errors(self):
+        with self.assertRaises(ValueError):
+            # No loading on an incorrect class.
+            StableDiffusion3TextToImagePreprocessor.from_preset("gpt2_base_en")
+        with self.assertRaises(ValueError):
+            # No loading on a non-keras model.
+            StableDiffusion3TextToImagePreprocessor.from_preset(
+                "hf://spacy/en_core_web_sm"
+            )