qwen-image-lora-dlc

Running on Zero

App Files Files Community

latostadaok commited on Sep 20

Commit

0dcb062

1 Parent(s): 866d9b3

Add support for pruned model and update generation modes in app.py

Browse files

Files changed (2) hide show

.gitignore +8 -0
app.py +30 -13

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Virtual Environments
+venv/
+env/
+.venv/
+# Python cache
+__pycache__/
+*.pyc

app.py CHANGED Viewed

@@ -89,6 +89,12 @@ pipe = DiffusionPipeline.from_pretrained(
     base_model, scheduler=scheduler, torch_dtype=dtype
 ).to(device)
 # Lightning LoRA info (no global state)
 LIGHTNING_LORA_REPO = "lightx2v/Qwen-Image-Lightning"
 LIGHTNING_LORA_WEIGHT = "Qwen-Image-Lightning-8steps-V1.0.safetensors"
@@ -162,17 +168,19 @@ def handle_speed_mode(speed_mode):
     """Update UI based on speed/quality toggle."""
     if speed_mode == "Speed (8 steps)":
         return gr.update(value="Speed mode selected - 8 steps with Lightning LoRA"), 8, 1.0
     else:
         return gr.update(value="Quality mode selected - 45 steps for best quality"), 45, 3.5
 @spaces.GPU(duration=70)
-def generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, negative_prompt=""):
-    pipe.to("cuda")
     generator = torch.Generator(device="cuda").manual_seed(seed)
     with calculateDuration("Generating image"):
         # Generate image
-        image = pipe(
             prompt=prompt_mash,
             negative_prompt=negative_prompt,
             num_inference_steps=steps,
@@ -205,15 +213,21 @@ def run_lora(prompt, cfg_scale, steps, selected_index, randomize_seed, seed, asp
     else:
         prompt_mash = prompt
     # Always unload any existing LoRAs first to avoid conflicts
     with calculateDuration("Unloading existing LoRAs"):
-        pipe.unload_lora_weights()
     # Load LoRAs based on speed mode
     if speed_mode == "Speed (8 steps)":
         with calculateDuration("Loading Lightning LoRA and style LoRA"):
             # Load Lightning LoRA first
-            pipe.load_lora_weights(
                 LIGHTNING_LORA_REPO,
                 weight_name=LIGHTNING_LORA_WEIGHT,
                 adapter_name="lightning"
@@ -221,7 +235,7 @@ def run_lora(prompt, cfg_scale, steps, selected_index, randomize_seed, seed, asp
             # Load the selected style LoRA
             weight_name = selected_lora.get("weights", None)
-            pipe.load_lora_weights(
                 lora_path,
                 weight_name=weight_name,
                 low_cpu_mem_usage=True,
@@ -229,18 +243,21 @@ def run_lora(prompt, cfg_scale, steps, selected_index, randomize_seed, seed, asp
             )
             # Set both adapters active with their weights
-            pipe.set_adapters(["lightning", "style"], adapter_weights=[1.0, lora_scale])
     else:
-        # Quality mode - only load the style LoRA
-        with calculateDuration(f"Loading LoRA weights for {selected_lora['title']}"):
             weight_name = selected_lora.get("weights", None)
-            pipe.load_lora_weights(
                 lora_path,
                 weight_name=weight_name,
                 low_cpu_mem_usage=True,
                 adapter_name="style"
             )
-            pipe.set_adapters(["style"], adapter_weights=[lora_scale])
     # Set random seed for reproducibility
     with calculateDuration("Randomizing seed"):
@@ -251,7 +268,7 @@ def run_lora(prompt, cfg_scale, steps, selected_index, randomize_seed, seed, asp
     width, height = get_image_size(aspect_ratio)
     # Generate the image
-    final_image = generate_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale)
     return final_image, seed
@@ -433,7 +450,7 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css, delete_cache=(60, 60)) as app:
             with gr.Row():
                 speed_mode = gr.Radio(
                     label="Generation Mode",
-                    choices=["Speed (8 steps)", "Quality (45 steps)"],
                     value="Speed (8 steps)",
                     info="Speed mode uses Lightning LoRA for faster generation"
                 )

     base_model, scheduler=scheduler, torch_dtype=dtype
 ).to(device)
+# Pruned model
+pruned_model = "OPPOer/Qwen-Image-Pruning"
+pruned_pipe = DiffusionPipeline.from_pretrained(
+    pruned_model, scheduler=scheduler, torch_dtype=dtype
+).to(device)
 # Lightning LoRA info (no global state)
 LIGHTNING_LORA_REPO = "lightx2v/Qwen-Image-Lightning"
 LIGHTNING_LORA_WEIGHT = "Qwen-Image-Lightning-8steps-V1.0.safetensors"
     """Update UI based on speed/quality toggle."""
     if speed_mode == "Speed (8 steps)":
         return gr.update(value="Speed mode selected - 8 steps with Lightning LoRA"), 8, 1.0
+    elif speed_mode == "Prune (8 steps)":
+        return gr.update(value="Prune mode selected - 8 steps with Pruned Model"), 8, 1.0
     else:
         return gr.update(value="Quality mode selected - 45 steps for best quality"), 45, 3.5
 @spaces.GPU(duration=70)
+def generate_image(current_pipe, prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, negative_prompt=""):
+    current_pipe.to("cuda")
     generator = torch.Generator(device="cuda").manual_seed(seed)
     with calculateDuration("Generating image"):
         # Generate image
+        image = current_pipe(
             prompt=prompt_mash,
             negative_prompt=negative_prompt,
             num_inference_steps=steps,
     else:
         prompt_mash = prompt
+    # Select the pipeline based on the mode
+    if speed_mode == "Prune (8 steps)":
+        current_pipe = pruned_pipe
+    else:
+        current_pipe = pipe
     # Always unload any existing LoRAs first to avoid conflicts
     with calculateDuration("Unloading existing LoRAs"):
+        current_pipe.unload_lora_weights()
     # Load LoRAs based on speed mode
     if speed_mode == "Speed (8 steps)":
         with calculateDuration("Loading Lightning LoRA and style LoRA"):
             # Load Lightning LoRA first
+            current_pipe.load_lora_weights(
                 LIGHTNING_LORA_REPO,
                 weight_name=LIGHTNING_LORA_WEIGHT,
                 adapter_name="lightning"
             # Load the selected style LoRA
             weight_name = selected_lora.get("weights", None)
+            current_pipe.load_lora_weights(
                 lora_path,
                 weight_name=weight_name,
                 low_cpu_mem_usage=True,
             )
             # Set both adapters active with their weights
+            current_pipe.set_adapters(["lightning", "style"], adapter_weights=[1.0, lora_scale])
     else:
+        # Quality or Prune mode - only load the style LoRA
+        log_message = f"Loading LoRA weights for {selected_lora['title']}"
+        if speed_mode == "Prune (8 steps)":
+            log_message += " on Pruned Model"
+        with calculateDuration(log_message):
             weight_name = selected_lora.get("weights", None)
+            current_pipe.load_lora_weights(
                 lora_path,
                 weight_name=weight_name,
                 low_cpu_mem_usage=True,
                 adapter_name="style"
             )
+            current_pipe.set_adapters(["style"], adapter_weights=[lora_scale])
     # Set random seed for reproducibility
     with calculateDuration("Randomizing seed"):
     width, height = get_image_size(aspect_ratio)
     # Generate the image
+    final_image = generate_image(current_pipe, prompt_mash, steps, seed, cfg_scale, width, height, lora_scale)
     return final_image, seed
             with gr.Row():
                 speed_mode = gr.Radio(
                     label="Generation Mode",
+                    choices=["Speed (8 steps)", "Quality (45 steps)", "Prune (8 steps)"],
                     value="Speed (8 steps)",
                     info="Speed mode uses Lightning LoRA for faster generation"
                 )