FLUX.MF-Lightning-Fast-Upscaler

Running on Zero

App Files Files Community

LPX55 commited on Mar 7

Commit

2310622

verified ·

1 Parent(s): 4af365d

Update optimized.py

Browse files

Files changed (1) hide show

optimized.py +37 -2

optimized.py CHANGED Viewed

@@ -6,6 +6,31 @@ from diffusers import FluxControlNetModel, FluxControlNetPipeline, AutoencoderKL
 import gradio as gr
 from accelerate import init_empty_weights
 huggingface_token = os.getenv("HUGGINFACE_TOKEN")
 good_vae = AutoencoderKL.from_pretrained(
@@ -35,8 +60,18 @@ pipe = FluxControlNetPipeline.from_pretrained(
 print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f}GB")
 # Proper CPU offloading sequence
 pipe.enable_model_cpu_offload(device="cuda")  # First enable offloading
-pipe.enable_vae_slicing()  # Then enable memory optimizations
-pipe.enable_attention_slicing(1)
 # Handle xformers/SDP attention after offloading
 try:

 import gradio as gr
 from accelerate import init_empty_weights
+def self_attention_slicing(module, slice_size=3):
+    """Modified from Diffusers' original for Flux compatibility"""
+    def sliced_attention(*args, **kwargs):
+        if "dim" in kwargs:
+            dim = kwargs["dim"]
+        else:
+            dim = 1
+        if slice_size == "auto":
+            # Automatic slicing based on Flux architecture
+            return module(*args, **kwargs)
+        output = torch.cat([
+            module(
+                *[arg[:, :, i:i+slice_size] if i == dim else arg
+                for arg in args],
+                **{k: v[:, :, i:i+slice_size] if k == dim else v
+                   for k,v in kwargs.items()}
+            )
+            for i in range(0, args[0].shape[dim], slice_size)
+        ], dim=dim)
+        return output
+    return sliced_attention
 huggingface_token = os.getenv("HUGGINFACE_TOKEN")
 good_vae = AutoencoderKL.from_pretrained(
 print(f"VRAM used: {torch.cuda.memory_allocated()/1e9:.2f}GB")
 # Proper CPU offloading sequence
 pipe.enable_model_cpu_offload(device="cuda")  # First enable offloading
+# 2. Then apply custom VAE slicing
+if getattr(pipe, "vae", None) is not None:
+    # Method 1: Use official implementation if available
+    try:
+        pipe.vae.enable_slicing()
+    except AttributeError:
+        # Method 2: Apply manual slicing for Flux compatibility [source_id]pipeline_flux_controlnet.py
+        pipe.vae.decode = self_attention_slicing(pipe.vae.decode, 2)
+# 3. Attention optimizations
+pipe.enable_attention_slicing(1)  # Mandatory for Flux
 # Handle xformers/SDP attention after offloading
 try: