Merge pull request #252 from mrhan1993:patch

mrhan1993 · web-flow · commit d9869be864e7 · 2024-03-20T13:04:19.000+08:00
After running for a long time, due to the loading problem of the model, the GPU memory and memory footprint will continue to increase, which will lead to the emergence of OOM. Add manual release logic while avoiding repeated loading of the model as much as possible. thanks for @PeakLee and his code: #245 (comment)
diff --git a/fooocusapi/worker.py b/fooocusapi/worker.py
@@ -12,6 +12,7 @@
 from fooocusapi.task_queue import QueueTask, TaskQueue, TaskOutputs
 
 worker_queue: TaskQueue = None
+last_model_name = None
 
 def process_top():
     import ldm_patched.modules.model_management
@@ -118,6 +119,17 @@ def yield_result(_, imgs, tasks, extension='png'):
 
     try:
         print(f"[Task Queue] Task queue start task, job_id={async_task.job_id}")
+        # clear memory
+        global last_model_name
+
+        if last_model_name is None:
+            last_model_name = async_task.req_param.base_model_name
+        if last_model_name != async_task.req_param.base_model_name:
+            model_management.cleanup_models() # key1
+            model_management.unload_all_models()
+            model_management.soft_empty_cache() # key2
+            last_model_name = async_task.req_param.base_model_name
+
         worker_queue.start_task(async_task.job_id)
 
         execution_start_time = time.perf_counter()