image: Use deepcache to optimise sd model

Which bring 2x speed-up
frostyplanet · Sep 27, 2024 · 7ff4245 · 7ff4245
1 parent 3369106
commit 7ff4245
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 0 deletions.
diff --git a/setup.cfg b/setup.cfg
@@ -173,6 +173,7 @@ rerank =
 image =
     diffusers>=0.30.0  # fix conflict with matcha-tts
     controlnet_aux
+    deepcache
 video =
     diffusers>=0.30.0
     imageio-ffmpeg

diff --git a/xinference/deploy/docker/requirements.txt b/xinference/deploy/docker/requirements.txt
@@ -74,6 +74,7 @@ ormsgpack  # For Fish Speech
 qwen-vl-utils # For qwen2-vl
 datamodel_code_generator # for minicpm-4B
 jsonschema # for minicpm-4B
+deepcache # for sd
 
 # sglang
 outlines>=0.0.44

diff --git a/xinference/model/image/stable_diffusion/core.py b/xinference/model/image/stable_diffusion/core.py
@@ -199,6 +199,14 @@ def load(self):
         elif not self._kwargs.get("device_map"):
             logger.debug("Loading model to available device")
             self._model = move_model_to_available_device(self._model)
+        try:
+            from DeepCache import DeepCacheSDHelper
+
+            helper = DeepCacheSDHelper(pipe=self._model)
+            helper.set_params(cache_interval=3, cache_branch_id=0)
+            helper.enable()
+        except:
+            pass
         # Recommended if your computer has < 64 GB of RAM
         self._model.enable_attention_slicing()
         self._apply_lora()