Added support for adjusting text prompt strengths (useful in Revision mode)

MoonRide303 · MoonRide303 · commit e66e8c1ad55b · 2023-09-10T19:36:56.000+02:00
diff --git a/modules/async_worker.py b/modules/async_worker.py
@@ -60,7 +60,7 @@ def handler(task):
         base_model_name, refiner_model_name, base_clip_skip, refiner_clip_skip, \
         l1, w1, l2, w2, l3, w3, l4, w4, l5, w5, save_metadata_json, save_metadata_image, \
         img2img_mode, img2img_start_step, img2img_denoise, \
-        revision_mode, zero_out_positive, zero_out_negative, revision_strength_1, revision_strength_2, \
+        revision_mode, positive_prompt_strength, negative_prompt_strength, revision_strength_1, revision_strength_2, \
         revision_strength_3, revision_strength_4, same_seed_for_all, output_format, \
         control_lora_canny, canny_edge_low, canny_edge_high, canny_start, canny_stop, canny_strength, canny_model, \
         control_lora_depth, depth_start, depth_stop, depth_strength, depth_model, prompt_expansion, \
@@ -127,11 +127,11 @@ def handler(task):
         if not prompt_expansion:
             outputs.append(['preview', (5, 'Encoding negative text ...', None)])
             n_txt = apply_style_negative(style, negative_prompt)
-            n_cond = pipeline.process_prompt(n_txt, base_clip_skip, refiner_clip_skip, zero_out_negative)
+            n_cond = pipeline.process_prompt(n_txt, base_clip_skip, refiner_clip_skip, negative_prompt_strength)
 
             outputs.append(['preview', (9, 'Encoding positive text ...', None)])
             p_txt = apply_style_positive(style, prompt)
-            p_cond = pipeline.process_prompt(p_txt, base_clip_skip, refiner_clip_skip, zero_out_positive, revision_mode, revision_strengths, clip_vision_outputs)
+            p_cond = pipeline.process_prompt(p_txt, base_clip_skip, refiner_clip_skip, positive_prompt_strength, revision_mode, revision_strengths, clip_vision_outputs)
 
             for i in range(image_number):
                 current_seed = seed if same_seed_for_all else seed + i
@@ -162,12 +162,12 @@ def handler(task):
 
             outputs.append(['preview', (9, 'Encoding negative text ...', None)])
             n_txt = apply_style_negative(style, negative_prompt)
-            n_cond = pipeline.process_prompt(n_txt, base_clip_skip, refiner_clip_skip, zero_out_negative)
+            n_cond = pipeline.process_prompt(n_txt, base_clip_skip, refiner_clip_skip, negative_prompt_strength)
 
             for i, t in enumerate(tasks):
                 outputs.append(['preview', (12, f'Encoding positive text #{i + 1} ...', None)])
                 t['p_cond'] = pipeline.process_prompt(t['real_positive_prompt'], base_clip_skip, refiner_clip_skip,
-                    zero_out_positive, revision_mode, revision_strengths, clip_vision_outputs)
+                    positive_prompt_strength, revision_mode, revision_strengths, clip_vision_outputs)
                 t['real_negative_prompt'] = n_txt
                 t['n_cond'] = n_cond
 
@@ -249,7 +249,7 @@ def callback(step, x0, x, total_steps, y):
                 'base_model': base_model_name, 'refiner_model': refiner_model_name,
                 'l1': l1, 'w1': w1, 'l2': l2, 'w2': w2, 'l3': l3, 'w3': w3,
                 'l4': l4, 'w4': w4, 'l5': l5, 'w5': w5, 'img2img': img2img_mode, 'revision': revision_mode,
-                'zero_out_positive': zero_out_positive, 'zero_out_negative': zero_out_negative,
+                'positive_prompt_strength': positive_prompt_strength, 'negative_prompt_strength': negative_prompt_strength,
                 'control_lora_canny': control_lora_canny, 'control_lora_depth': control_lora_depth,
                 'prompt_expansion': prompt_expansion
             }
@@ -296,7 +296,7 @@ def callback(step, x0, x, total_steps, y):
                     ('Image-2-Image', (img2img_mode, start_step, denoise, input_image_filename) if img2img_mode else (img2img_mode)),
                     ('Revision', (revision_mode, revision_strength_1, revision_strength_2, revision_strength_3,
                         revision_strength_4, revision_images_filenames) if revision_mode else (revision_mode)),
-                    ('Zero Out Prompts', (zero_out_positive, zero_out_negative)),
+                    ('Prompt Strengths', (positive_prompt_strength, negative_prompt_strength)),
                     ('Canny', (control_lora_canny, canny_edge_low, canny_edge_high, canny_start, canny_stop,
                         canny_strength, canny_model, input_image_filename) if control_lora_canny else (control_lora_canny)),
                     ('Depth', (control_lora_depth, depth_start, depth_stop, depth_strength, depth_model, input_image_filename) if control_lora_depth else (control_lora_depth))
diff --git a/modules/core.py b/modules/core.py
@@ -9,7 +9,7 @@
 
 from comfy.sd import load_checkpoint_guess_config
 from nodes import VAEDecode, EmptyLatentImage, CLIPTextEncode, VAEEncode, \
-    ConditioningZeroOut, CLIPVisionEncode, unCLIPConditioning, ControlNetApplyAdvanced
+    ConditioningZeroOut, ConditioningAverage, CLIPVisionEncode, unCLIPConditioning, ControlNetApplyAdvanced
 from comfy.sample import prepare_mask, broadcast_cond, get_additional_models, cleanup_additional_models
 from comfy_extras.nodes_post_processing import ImageScaleToTotalPixels
 from comfy_extras.nodes_canny import Canny
@@ -25,6 +25,7 @@
 opVAEEncode = VAEEncode()
 opImageScaleToTotalPixels = ImageScaleToTotalPixels()
 opConditioningZeroOut = ConditioningZeroOut()
+opConditioningAverage = ConditioningAverage()
 opCLIPVisionEncode = CLIPVisionEncode()
 opUnCLIPConditioning = unCLIPConditioning()
 opCanny = Canny()
@@ -102,6 +103,16 @@ def zero_out(conditioning):
     return opConditioningZeroOut.zero_out(conditioning=conditioning)[0]
 
 
+@torch.no_grad()
+def average(conditioning_to, conditioning_from, conditioning_to_strength):
+    return opConditioningAverage.addWeighted(conditioning_to=conditioning_to, conditioning_from=conditioning_from, conditioning_to_strength=conditioning_to_strength)[0]
+
+
+@torch.no_grad()
+def set_conditioning_strength(conditioning, strength):
+    return average(conditioning, zero_out(conditioning), strength)
+
+
 @torch.no_grad()
 def encode_clip_vision(clip_vision, image):
     return opCLIPVisionEncode.encode(clip_vision=clip_vision, image=image)[0]
diff --git a/modules/default_pipeline.py b/modules/default_pipeline.py
@@ -166,11 +166,11 @@ def expand_txt(*args, **kwargs):
     return expansion_model(*args, **kwargs)
 
 
-def process_prompt(text, base_clip_skip, refiner_clip_skip, zero_out=False, revision=False, revision_strengths=[], clip_vision_outputs=[]):
+def process_prompt(text, base_clip_skip, refiner_clip_skip, prompt_strength=1.0, revision=False, revision_strengths=[], clip_vision_outputs=[]):
     xl_base_patched.clip.clip_layer(base_clip_skip)
     base_cond = core.encode_prompt_condition(clip=xl_base_patched.clip, prompt=text)
-    if zero_out:
-        base_cond = core.zero_out(base_cond)
+    if prompt_strength >= 0 and prompt_strength < 1.0:
+        base_cond = core.set_conditioning_strength(base_cond, prompt_strength)
 
     if revision:
         set_comfy_adm_encoding()
@@ -183,8 +183,8 @@ def process_prompt(text, base_clip_skip, refiner_clip_skip, zero_out=False, revi
     if xl_refiner is not None:
         xl_refiner.clip.clip_layer(refiner_clip_skip)
         refiner_cond = core.encode_prompt_condition(clip=xl_refiner.clip, prompt=text)
-        if zero_out:
-            refiner_cond = core.zero_out(refiner_cond)
+        if prompt_strength >= 0 and prompt_strength < 1.0:
+            refiner_cond = core.set_conditioning_strength(refiner_cond, prompt_strength)
     else:
         refiner_cond = None
     return base_cond, refiner_cond
diff --git a/modules/settings.py b/modules/settings.py
@@ -39,8 +39,8 @@ def load_settings():
     settings['depth_model'] = modules.path.default_controlnet_depth_name
     settings['keep_input_names'] = False
     settings['revision_mode'] = False
-    settings['zero_out_positive'] = False
-    settings['zero_out_negative'] = False
+    settings['positive_prompt_strength'] = 1.0
+    settings['negative_prompt_strength'] = 1.0
     settings['revision_strength_1'] = 1.0
     settings['revision_strength_2'] = 1.0
     settings['revision_strength_3'] = 1.0
diff --git a/readme.md b/readme.md
@@ -121,25 +121,26 @@ Below things are already inside the software, and **users do not need to do anyt
 2. Support for Control-LoRA: Canny Edge (guiding diffusion using edge detection on input, see [Canny Edge description from SAI](https://huggingface.co/stabilityai/control-lora#canny-edge)).
 3. Support for Control-LoRA: Depth (guiding diffusion using depth information from input, see [Depth description from SAI](https://huggingface.co/stabilityai/control-lora#midas-and-clipdrop-depth)).
 4. Support for Control-LoRA: Revision (prompting with images, see [Revision description from SAI](https://huggingface.co/stabilityai/control-lora#revision)).
-5. Support for embeddings (use "embedding:embedding_name" syntax, ComfyUI style).
-6. Customizable sampling parameters (sampler, scheduler, steps, base / refiner switch point, CFG, CLIP Skip).
-7. Displaying full metadata for generated images in the UI.
-8. Support for JPEG format.
-9. Ability to save full metadata for generated images (as JSON or embedded in image, disabled by default).
-10. Ability to load prompt information from JSON and image files (if saved with metadata).
-11. Ability to change default values of UI settings (loaded from settings.json file - use settings-example.json as a template).
-12. Ability to change default paths (loaded from paths.json file - use paths-example.json as a template).
-13. Ability to retain input files names (when using Image-2-Image mode).
-14. Ability to generate multiple images using same seed (useful in Image-2-Image mode).
-15. Ability to generate images forever (ported from SD web UI - right-click on Generate button to start or stop this mode).
-16. Ability to stop image generation.
-17. Official list of SDXL resolutions (as defined in [SDXL paper](https://arxiv.org/abs/2307.01952)).
-18. Compact resolution and style selection (thx to [runew0lf](https://github.com/runew0lf) for hints).
-19. Support for custom resolutions list (loaded from resolutions.json - use resolutions-example.json as a template).
-20. Support for custom resolutions - you can just type it now in Resolution field, like "1280x640".
-21. Support for custom styles (loaded from sdxl_styles folder on start).
-22. Support for playing audio when generation is finished (ported from SD web UI - use notification.ogg or notification.mp3).
-23. Starting generation via Ctrl-ENTER hotkey (ported from SD web UI).
+5. Adjustable text prompt strengths (useful in Revision mode).
+6. Support for embeddings (use "embedding:embedding_name" syntax, ComfyUI style).
+7. Customizable sampling parameters (sampler, scheduler, steps, base / refiner switch point, CFG, CLIP Skip).
+8. Displaying full metadata for generated images in the UI.
+9. Support for JPEG format.
+10. Ability to save full metadata for generated images (as JSON or embedded in image, disabled by default).
+11. Ability to load prompt information from JSON and image files (if saved with metadata).
+12. Ability to change default values of UI settings (loaded from settings.json file - use settings-example.json as a template).
+13. Ability to change default paths (loaded from paths.json file - use paths-example.json as a template).
+14. Ability to retain input files names (when using Image-2-Image mode).
+15. Ability to generate multiple images using same seed (useful in Image-2-Image mode).
+16. Ability to generate images forever (ported from SD web UI - right-click on Generate button to start or stop this mode).
+17. Ability to stop image generation.
+18. Official list of SDXL resolutions (as defined in [SDXL paper](https://arxiv.org/abs/2307.01952)).
+19. Compact resolution and style selection (thx to [runew0lf](https://github.com/runew0lf) for hints).
+20. Support for custom resolutions list (loaded from resolutions.json - use resolutions-example.json as a template).
+21. Support for custom resolutions - you can just type it now in Resolution field, like "1280x640".
+22. Support for custom styles (loaded from sdxl_styles folder on start).
+23. Support for playing audio when generation is finished (ported from SD web UI - use notification.ogg or notification.mp3).
+24. Starting generation via Ctrl-ENTER hotkey (ported from SD web UI).
 
 ## Thanks
 
diff --git a/settings-example.json b/settings-example.json
@@ -31,8 +31,8 @@
     "depth_model": "control-lora-depth-rank128.safetensors",
     "keep_input_names": false,
     "revision": false,
-    "zero_out_positive": false,
-    "zero_out_negative": false,
+    "positive_prompt_strength": 1.0,
+    "negative_prompt_strength": 1.0,
     "revision_strength_1": 1.0,
     "revision_strength_2": 1.0,
     "revision_strength_3": 1.0,
diff --git a/update_log_mre.md b/update_log_mre.md
@@ -1,5 +1,6 @@
 ### 1.0.51 MRE
 
+* Added support for adjusting text prompt strengths (useful in Revision mode).
 * Updated Comfy.
 
 ### 1.0.50 MRE
diff --git a/webui.py b/webui.py
@@ -155,10 +155,14 @@ def metadata_to_ctrls(metadata, ctrls):
             ctrls[31] = metadata['denoise']
     if 'revision' in metadata:
         ctrls[32] = metadata['revision']
-    if 'zero_out' in metadata:
-        ctrls[33] = metadata['zero_out_positive']
-    if 'zero_out' in metadata:
-        ctrls[34] = metadata['zero_out_negative']
+    if 'positive_prompt_strength' in metadata:
+        ctrls[33] = metadata['positive_prompt_strength']
+    elif 'zero_out_positive' in metadata:
+        ctrls[33] = 0.0 if metadata['zero_out_positive'] else 1.0
+    if 'negative_prompt_strength' in metadata:
+        ctrls[34] = metadata['negative_prompt_strength']
+    elif 'zero_out_negative' in metadata:
+        ctrls[34] = 0.0 if metadata['zero_out_negative'] else 1.0
     if 'revision_strength_1' in metadata:
         ctrls[35] = metadata['revision_strength_1']
     if 'revision_strength_2' in metadata:
@@ -329,9 +333,9 @@ def performance_changed(value):
                 revision_strength_2 = gr.Slider(label='Revision Strength for Image 2', minimum=-2, maximum=2, step=0.01, value=settings['revision_strength_2'])
                 revision_strength_3 = gr.Slider(label='Revision Strength for Image 3', minimum=-2, maximum=2, step=0.01, value=settings['revision_strength_3'])
                 revision_strength_4 = gr.Slider(label='Revision Strength for Image 4', minimum=-2, maximum=2, step=0.01, value=settings['revision_strength_4'])
-                with gr.Row():
-                    zero_out_positive = gr.Checkbox(label='Zero Out Positive Prompt', value=settings['zero_out_positive'])
-                    zero_out_negative = gr.Checkbox(label='Zero Out Negative Prompt', value=settings['zero_out_negative'])
+
+                positive_prompt_strength = gr.Slider(label='Positive Prompt Strength', minimum=0, maximum=1, step=0.01, value=settings['positive_prompt_strength'])
+                negative_prompt_strength = gr.Slider(label='Negative Prompt Strength', minimum=0, maximum=1, step=0.01, value=settings['negative_prompt_strength'])
 
                 img2img_start_step = gr.Slider(label='Image-2-Image Start Step', minimum=0.0, maximum=0.8, step=0.01, value=settings['img2img_start_step'])
                 img2img_denoise = gr.Slider(label='Image-2-Image Denoise', minimum=0.2, maximum=1.0, step=0.01, value=settings['img2img_denoise'])
@@ -349,7 +353,7 @@ def performance_changed(value):
                 output_to_input_button.click(output_to_input_handler, inputs=output_gallery, outputs=[input_gallery, gallery_tabs])
                 output_to_revision_button.click(output_to_revision_handler, inputs=output_gallery, outputs=[revision_mode, revision_gallery, gallery_tabs])
 
-                img2img_ctrls = [img2img_mode, img2img_start_step, img2img_denoise, revision_mode, zero_out_positive, zero_out_negative,
+                img2img_ctrls = [img2img_mode, img2img_start_step, img2img_denoise, revision_mode, positive_prompt_strength, negative_prompt_strength,
                     revision_strength_1, revision_strength_2, revision_strength_3, revision_strength_4]
 
                 def verify_revision(rev, gallery_in, gallery_rev, gallery_out):