Spaces:

prithivMLmods
/

Qwen-Image-LoRA-DLC

Running on Zero

App Files Files Community

prithivMLmods commited on Aug 25

Commit

79b053d

verified ·

1 Parent(s): 35e8372

update app (#16)

Browse files

- update app (7b6451c39f783b66a81ae62d4ad045a88c57c210)

Files changed (1) hide show

app.py +130 -140

app.py CHANGED Viewed

@@ -13,17 +13,12 @@ import gradio as gr
 import spaces
 from diffusers import (
     DiffusionPipeline,
-    AutoencoderKL,
-    AutoencoderTiny,
-    AutoPipelineForImage2Image,
-    FlowMatchEulerDiscreteScheduler
-)
 from huggingface_hub import (
     hf_hub_download,
     HfFileSystem,
     ModelCard,
-    snapshot_download
-)
 from diffusers.utils import load_image
 import requests
 from urllib.parse import urlparse
@@ -120,14 +115,10 @@ loras = [
     },
 ]
-# Initialize the base model and autoencoders
 dtype = torch.bfloat16
 base_model = "Qwen/Qwen-Image"
-# Initialize TAEF1 for fast previews and the standard VAE for high-quality final images
-taef1 = AutoencoderTiny.from_pretrained("madebyollin/taef1", torch_dtype=dtype).to(device)
-good_vae = AutoencoderKL.from_pretrained(base_model, subfolder="vae", torch_dtype=dtype).to(device)
 # Scheduler configuration from the Qwen-Image-Lightning repository
 scheduler_config = {
     "base_image_seq_len": 256,
@@ -147,21 +138,10 @@ scheduler_config = {
 }
 scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
-# Main pipeline for text-to-image, using taef1 for fast decoding during generation
 pipe = DiffusionPipeline.from_pretrained(
-    base_model, scheduler=scheduler, torch_dtype=dtype, vae=taef1
-).to(device)
-# Image-to-image pipeline, using the high-quality VAE
-pipe_i2i = AutoPipelineForImage2Image.from_pretrained(
-    base_model,
-    vae=good_vae,
-    scheduler=scheduler,
-    torch_dtype=dtype
 ).to(device)
 # Lightning LoRA info (no global state)
 LIGHTNING_LORA_REPO = "lightx2v/Qwen-Image-Lightning"
 LIGHTNING_LORA_WEIGHT = "Qwen-Image-Lightning-8steps-V1.0.safetensors"
@@ -232,32 +212,29 @@ def adjust_generation_mode(speed_mode):
     else:
         return gr.update(value="Base mode selected - 48 steps for best quality"), 48, 4.0
-def image_to_image_generation(prompt_mash, image_input, strength, steps, cfg_scale, width, height, lora_scale, seed):
-    """Handles the image-to-image generation process."""
     generator = torch.Generator(device="cuda").manual_seed(seed)
-    pipe_i2i.to("cuda")
-    # Resize and convert input image
-    image_input_pil = load_image(image_input).resize((width, height), Image.Resampling.LANCZOS)
-    final_image = pipe_i2i(
-        prompt=prompt_mash,
-        image=image_input_pil,
-        strength=strength,
-        num_inference_steps=steps,
-        guidance_scale=cfg_scale,
-        generator=generator,
-        # Note: image-to-image with Qwen doesn't use `true_cfg_scale`
-    ).images[0]
-    return final_image
 @spaces.GPU(duration=100)
-def process_generation_request(
-    prompt, image_input, image_strength, cfg_scale, steps, selected_index,
-    randomize_seed, seed, aspect_ratio, lora_scale, speed_mode, progress=gr.Progress(track_tqdm=True)
-):
     if selected_index is None:
-        raise gr.Error("You must select a LoRA before proceeding.🧨")
     selected_lora = loras[selected_index]
     lora_path = selected_lora["repo"]
@@ -265,85 +242,63 @@ def process_generation_request(
     # Prepare prompt with trigger word
     if trigger_word:
-        prompt_mash = f"{trigger_word}, {prompt}" if prompt else trigger_word
     else:
         prompt_mash = prompt
-    # Set random seed if requested
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    # Determine which pipeline to use
-    pipe_to_use = pipe_i2i if image_input is not None else pipe
     # Always unload any existing LoRAs first to avoid conflicts
     with Timer("Unloading existing LoRAs"):
-        pipe_to_use.unload_lora_weights()
     # Load LoRAs based on speed mode
     if speed_mode == "Fast (8 steps)":
         with Timer("Loading Lightning LoRA and style LoRA"):
-            pipe_to_use.load_lora_weights(
                 LIGHTNING_LORA_REPO,
                 weight_name=LIGHTNING_LORA_WEIGHT,
                 adapter_name="lightning"
             )
-            weight_name = selected_lora.get("weights")
-            pipe_to_use.load_lora_weights(
                 lora_path,
                 weight_name=weight_name,
                 adapter_name="style"
             )
-            pipe_to_use.set_adapters(["lightning", "style"], adapter_weights=[1.0, lora_scale])
-    else: # Quality mode
         with Timer(f"Loading LoRA weights for {selected_lora['title']}"):
-            weight_name = selected_lora.get("weights")
-            pipe_to_use.load_lora_weights(lora_path, weight_name=weight_name)
     width, height = compute_image_dimensions(aspect_ratio)
-    # --- Generation ---
-    if image_input is not None:
-        # Image-to-Image Generation
-        final_image = image_to_image_generation(prompt_mash, image_input, image_strength, steps, cfg_scale, width, height, lora_scale, seed)
-        yield final_image, seed, gr.update(visible=False)
-    else:
-        # Text-to-Image Generation with Previews
-        pipe.to("cuda")
-        generator = torch.Generator(device="cuda").manual_seed(seed)
-        # Callback for generating previews
-        def callback_on_step_end(pipe, step_index, timestep, callback_kwargs):
-            latents = callback_kwargs["latents"]
-            # Use the fast taef1 decoder for previews
-            with torch.no_grad():
-                image = pipe.decode_latents(latents.to(dtype))[0]
-            progress_bar = f'<div class="progress-container"><div class="progress-bar" style="--current: {step_index + 1}; --total: {steps};"></div></div>'
-            yield {"image": image, "seed": seed, "progress": gr.update(value=progress_bar, visible=True)}
-            return callback_kwargs
-        # Generate image with step-by-step previews
-        with Timer("Generating image with previews"):
-            generation_output = pipe(
-                prompt=prompt_mash,
-                num_inference_steps=steps,
-                true_cfg_scale=cfg_scale,
-                width=width,
-                height=height,
-                generator=generator,
-                output_type="latent", # Get latents to decode with the good VAE later
-                callback_on_step_end=callback_on_step_end
-            )
-        # Decode the final image with the high-quality VAE
-        with Timer("Final decoding with good VAE"):
-            final_latents = generation_output.images
-            pipe.vae = good_vae # Temporarily swap to the good VAE
-            final_image = pipe.decode_latents(final_latents.to(dtype))[0]
-            pipe.vae = taef1 # Swap back to taef1 for the next run
-        yield final_image, seed, gr.update(visible=False)
 def fetch_hf_adapter_files(link):
     split_link = link.split("/")
@@ -352,37 +307,79 @@ def fetch_hf_adapter_files(link):
     print(f"Repository attempted: {split_link}")
     model_card = ModelCard.load(link)
     base_model = model_card.data.get("base_model")
     print(f"Base model: {base_model}")
     acceptable_models = {"Qwen/Qwen-Image"}
     models_to_check = base_model if isinstance(base_model, list) else [base_model]
     if not any(model in acceptable_models for model in models_to_check):
         raise Exception("Not a Qwen-Image LoRA!")
-    image_path = model_card.data.get("widget", [{}])[0].get("output", {}).get("url")
     trigger_word = model_card.data.get("instance_prompt", "")
     image_url = f"https://huggingface.co/{link}/resolve/main/{image_path}" if image_path else None
     fs = HfFileSystem()
     try:
         list_of_files = fs.ls(link, detail=False)
-        safetensors_name = next((f.split('/')[-1] for f in list_of_files if f.endswith(".safetensors")), None)
         if not safetensors_name:
             raise Exception("No valid *.safetensors file found in the repository.")
     except Exception as e:
         print(e)
-        raise Exception("Could not find a valid *.safetensors file in the Hugging Face repository.")
     return split_link[1], link, safetensors_name, trigger_word, image_url
 def validate_custom_adapter(link):
     print(f"Checking a custom model on: {link}")
-    if link.startswith("https://huggingface.co"):
-        link = urlparse(link).path.strip("/")
-    return fetch_hf_adapter_files(link)
 def incorporate_custom_adapter(custom_lora):
     global loras
@@ -402,21 +399,30 @@ def incorporate_custom_adapter(custom_lora):
               </div>
             </div>
             '''
-            existing_item_index = next((i for i, item in enumerate(loras) if item['repo'] == repo), None)
             if existing_item_index is None:
-                new_item = {"image": image, "title": title, "repo": repo, "weights": path, "trigger_word": trigger_word}
                 loras.append(new_item)
-                existing_item_index = len(loras) - 1
             return gr.update(visible=True, value=card), gr.update(visible=True), gr.Gallery(selected_index=None), f"Custom: {path}", existing_item_index, trigger_word
         except Exception as e:
-            gr.Warning(f"Invalid LoRA: {e}")
-            return gr.update(visible=True, value=f"Invalid LoRA: {e}"), gr.update(visible=True), gr.update(), "", None, ""
-    return gr.update(visible=False), gr.update(visible=False), gr.update(), "", None, ""
 def discard_custom_adapter():
     return gr.update(visible=False), gr.update(visible=False), gr.update(), "", None, ""
 css = '''
 #gen_btn{height: 100%}
@@ -430,10 +436,6 @@ css = '''
 .card_internal img{margin-right: 1em}
 .styler{--form-gap-width: 0px !important}
 #speed_status{padding: .5em; border-radius: 5px; margin: 1em 0}
-#progress{height:30px}
-#progress .generating{display:none}
-.progress-container {width: 100%;height: 30px;background-color: #f0f0f0;border-radius: 15px;overflow: hidden;margin-bottom: 20px}
-.progress-bar {height: 100%;background-color: #4f46e5;width: calc(var(--current) / var(--total) * 100%);transition: width 0.1s ease-in-out}
 '''
 with gr.Blocks(theme="bethecloud/storj_theme", css=css, delete_cache=(120, 120)) as app:
@@ -457,10 +459,6 @@ with gr.Blocks(theme="bethecloud/storj_theme", css=css, delete_cache=(120, 120))
                 elem_id="gallery",
                 show_share_button=False
             )
-            with gr.Accordion("Image-to-Image (Optional)", open=False):
-                image_input = gr.Image(type="filepath", label="Input Image")
-                image_strength = gr.Slider(label="Image Strength", minimum=0.1, maximum=1.0, step=0.05, value=0.6)
             with gr.Group():
                 custom_lora = gr.Textbox(label="Custom LoRA", info="LoRA Hugging Face path", placeholder="username/lora-model-name")
                 gr.Markdown("[Check Qwen-Image LoRAs](https://huggingface.co/models?other=base_model:adapter:Qwen/Qwen-Image)", elem_id="lora_list")
@@ -469,14 +467,13 @@ with gr.Blocks(theme="bethecloud/storj_theme", css=css, delete_cache=(120, 120))
         with gr.Column():
             result = gr.Image(label="Generated Image")
-            progress_bar = gr.HTML(visible=False, elem_id="progress")
             with gr.Row():
                 aspect_ratio = gr.Dropdown(
                     label="Aspect Ratio",
                     choices=["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"],
                     value="1:1"
-                )
             with gr.Row():
                 speed_mode = gr.Dropdown(
                     label="Output Mode",
@@ -491,12 +488,12 @@ with gr.Blocks(theme="bethecloud/storj_theme", css=css, delete_cache=(120, 120))
             with gr.Column():
                 with gr.Row():
                     cfg_scale = gr.Slider(
-                        label="Guidance Scale",
                         minimum=1.0,
                         maximum=5.0,
                         step=0.1,
                         value=4.0,
-                        info="Lower for speed mode, higher for quality. Also called 'True CFG'."
                     )
                     steps = gr.Slider(
                         label="Steps",
@@ -536,18 +533,11 @@ with gr.Blocks(theme="bethecloud/storj_theme", css=css, delete_cache=(120, 120))
         outputs=[custom_lora_info, custom_lora_button, gallery, selected_info, selected_index, custom_lora]
     )
-    gen_inputs = [prompt, image_input, image_strength, cfg_scale, steps, selected_index, randomize_seed, seed, aspect_ratio, lora_scale, speed_mode]
-    gen_outputs = [result, seed, progress_bar]
-    generate_button.click(
-        fn=process_generation_request,
-        inputs=gen_inputs,
-        outputs=gen_outputs
-    )
-    prompt.submit(
-        fn=process_generation_request,
-        inputs=gen_inputs,
-        outputs=gen_outputs
     )
 app.queue()

 import spaces
 from diffusers import (
     DiffusionPipeline,
+    FlowMatchEulerDiscreteScheduler)
 from huggingface_hub import (
     hf_hub_download,
     HfFileSystem,
     ModelCard,
+    snapshot_download)
 from diffusers.utils import load_image
 import requests
 from urllib.parse import urlparse
     },
 ]
+# Initialize the base model
 dtype = torch.bfloat16
 base_model = "Qwen/Qwen-Image"
 # Scheduler configuration from the Qwen-Image-Lightning repository
 scheduler_config = {
     "base_image_seq_len": 256,
 }
 scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
 pipe = DiffusionPipeline.from_pretrained(
+    base_model, scheduler=scheduler, torch_dtype=dtype
 ).to(device)
 # Lightning LoRA info (no global state)
 LIGHTNING_LORA_REPO = "lightx2v/Qwen-Image-Lightning"
 LIGHTNING_LORA_WEIGHT = "Qwen-Image-Lightning-8steps-V1.0.safetensors"
     else:
         return gr.update(value="Base mode selected - 48 steps for best quality"), 48, 4.0
+@spaces.GPU(duration=100)
+def create_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale, negative_prompt=""):
+    pipe.to("cuda")
     generator = torch.Generator(device="cuda").manual_seed(seed)
+    with Timer("Generating image"):
+        # Generate image
+        image = pipe(
+            prompt=prompt_mash,
+            negative_prompt=negative_prompt,
+            num_inference_steps=steps,
+            true_cfg_scale=cfg_scale,  # Use true_cfg_scale for Qwen-Image
+            width=width,
+            height=height,
+            generator=generator,
+        ).images[0]
+    return image
 @spaces.GPU(duration=100)
+def process_adapter_generation(prompt, cfg_scale, steps, selected_index, randomize_seed, seed, aspect_ratio, lora_scale, speed_mode, progress=gr.Progress(track_tqdm=True)):
     if selected_index is None:
+        raise gr.Error("You must select a LoRA before proceeding.")
     selected_lora = loras[selected_index]
     lora_path = selected_lora["repo"]
     # Prepare prompt with trigger word
     if trigger_word:
+        if "trigger_position" in selected_lora:
+            if selected_lora["trigger_position"] == "prepend":
+                prompt_mash = f"{trigger_word} {prompt}"
+            else:
+                prompt_mash = f"{prompt} {trigger_word}"
+        else:
+            prompt_mash = f"{trigger_word} {prompt}"
     else:
         prompt_mash = prompt
     # Always unload any existing LoRAs first to avoid conflicts
     with Timer("Unloading existing LoRAs"):
+        pipe.unload_lora_weights()
     # Load LoRAs based on speed mode
     if speed_mode == "Fast (8 steps)":
         with Timer("Loading Lightning LoRA and style LoRA"):
+            # Load Lightning LoRA first
+            pipe.load_lora_weights(
                 LIGHTNING_LORA_REPO,
                 weight_name=LIGHTNING_LORA_WEIGHT,
                 adapter_name="lightning"
             )
+            # Load the selected style LoRA
+            weight_name = selected_lora.get("weights", None)
+            pipe.load_lora_weights(
                 lora_path,
                 weight_name=weight_name,
+                low_cpu_mem_usage=True,
                 adapter_name="style"
             )
+            # Set both adapters active with their weights
+            pipe.set_adapters(["lightning", "style"], adapter_weights=[1.0, lora_scale])
+    else:
+        # Quality mode - only load the style LoRA
         with Timer(f"Loading LoRA weights for {selected_lora['title']}"):
+            weight_name = selected_lora.get("weights", None)
+            pipe.load_lora_weights(
+                lora_path,
+                weight_name=weight_name,
+                low_cpu_mem_usage=True
+            )
+    # Set random seed for reproducibility
+    with Timer("Randomizing seed"):
+        if randomize_seed:
+            seed = random.randint(0, MAX_SEED)
+    # Get image dimensions from aspect ratio
     width, height = compute_image_dimensions(aspect_ratio)
+    # Generate the image
+    final_image = create_image(prompt_mash, steps, seed, cfg_scale, width, height, lora_scale)
+    return final_image, seed
 def fetch_hf_adapter_files(link):
     split_link = link.split("/")
     print(f"Repository attempted: {split_link}")
+    # Load model card
     model_card = ModelCard.load(link)
     base_model = model_card.data.get("base_model")
     print(f"Base model: {base_model}")
+    # Validate model type (for Qwen-Image)
     acceptable_models = {"Qwen/Qwen-Image"}
     models_to_check = base_model if isinstance(base_model, list) else [base_model]
     if not any(model in acceptable_models for model in models_to_check):
         raise Exception("Not a Qwen-Image LoRA!")
+    # Extract image and trigger word
+    image_path = model_card.data.get("widget", [{}])[0].get("output", {}).get("url", None)
     trigger_word = model_card.data.get("instance_prompt", "")
     image_url = f"https://huggingface.co/{link}/resolve/main/{image_path}" if image_path else None
+    # Initialize Hugging Face file system
     fs = HfFileSystem()
     try:
         list_of_files = fs.ls(link, detail=False)
+        # Find safetensors file
+        safetensors_name = None
+        for file in list_of_files:
+            filename = file.split("/")[-1]
+            if filename.endswith(".safetensors"):
+                safetensors_name = filename
+                break
         if not safetensors_name:
             raise Exception("No valid *.safetensors file found in the repository.")
     except Exception as e:
         print(e)
+        raise Exception("You didn't include a valid Hugging Face repository with a *.safetensors LoRA")
     return split_link[1], link, safetensors_name, trigger_word, image_url
 def validate_custom_adapter(link):
     print(f"Checking a custom model on: {link}")
+    if link.endswith('.safetensors'):
+        if 'huggingface.co' in link:
+            parts = link.split('/')
+            try:
+                hf_index = parts.index('huggingface.co')
+                username = parts[hf_index + 1]
+                repo_name = parts[hf_index + 2]
+                repo = f"{username}/{repo_name}"
+                safetensors_name = parts[-1]
+                try:
+                    model_card = ModelCard.load(repo)
+                    trigger_word = model_card.data.get("instance_prompt", "")
+                    image_path = model_card.data.get("widget", [{}])[0].get("output", {}).get("url", None)
+                    image_url = f"https://huggingface.co/{repo}/resolve/main/{image_path}" if image_path else None
+                except:
+                    trigger_word = ""
+                    image_url = None
+                return repo_name, repo, safetensors_name, trigger_word, image_url
+            except:
+                raise Exception("Invalid safetensors URL format")
+    if link.startswith("https://"):
+        if link.startswith("https://huggingface.co") or link.startswith("https://www.huggingface.co"):
+            link_split = link.split("huggingface.co/")
+            return fetch_hf_adapter_files(link_split[1])
+    else:
+        return fetch_hf_adapter_files(link)
 def incorporate_custom_adapter(custom_lora):
     global loras
               </div>
             </div>
             '''
+            existing_item_index = next((index for (index, item) in enumerate(loras) if item['repo'] == repo), None)
             if existing_item_index is None:
+                new_item = {
+                    "image": image,
+                    "title": title,
+                    "repo": repo,
+                    "weights": path,
+                    "trigger_word": trigger_word
+                }
+                print(new_item)
                 loras.append(new_item)
+                existing_item_index = len(loras) - 1  # Get the actual index after adding
             return gr.update(visible=True, value=card), gr.update(visible=True), gr.Gallery(selected_index=None), f"Custom: {path}", existing_item_index, trigger_word
         except Exception as e:
+            gr.Warning(f"Invalid LoRA: either you entered an invalid link, or a non-Qwen-Image LoRA, this was the issue: {e}")
+            return gr.update(visible=True, value=f"Invalid LoRA: either you entered an invalid link, a non-Qwen-Image LoRA"), gr.update(visible=True), gr.update(), "", None, ""
+    else:
+        return gr.update(visible=False), gr.update(visible=False), gr.update(), "", None, ""
 def discard_custom_adapter():
     return gr.update(visible=False), gr.update(visible=False), gr.update(), "", None, ""
+process_adapter_generation.zerogpu = True
 css = '''
 #gen_btn{height: 100%}
 .card_internal img{margin-right: 1em}
 .styler{--form-gap-width: 0px !important}
 #speed_status{padding: .5em; border-radius: 5px; margin: 1em 0}
 '''
 with gr.Blocks(theme="bethecloud/storj_theme", css=css, delete_cache=(120, 120)) as app:
                 elem_id="gallery",
                 show_share_button=False
             )
             with gr.Group():
                 custom_lora = gr.Textbox(label="Custom LoRA", info="LoRA Hugging Face path", placeholder="username/lora-model-name")
                 gr.Markdown("[Check Qwen-Image LoRAs](https://huggingface.co/models?other=base_model:adapter:Qwen/Qwen-Image)", elem_id="lora_list")
         with gr.Column():
             result = gr.Image(label="Generated Image")
             with gr.Row():
                 aspect_ratio = gr.Dropdown(
                     label="Aspect Ratio",
                     choices=["1:1", "16:9", "9:16", "4:3", "3:4", "3:2", "2:3"],
                     value="1:1"
+                    )
             with gr.Row():
                 speed_mode = gr.Dropdown(
                     label="Output Mode",
             with gr.Column():
                 with gr.Row():
                     cfg_scale = gr.Slider(
+                        label="Guidance Scale (True CFG)",
                         minimum=1.0,
                         maximum=5.0,
                         step=0.1,
                         value=4.0,
+                        info="Lower for speed mode, higher for quality"
                     )
                     steps = gr.Slider(
                         label="Steps",
         outputs=[custom_lora_info, custom_lora_button, gallery, selected_info, selected_index, custom_lora]
     )
+    gr.on(
+        triggers=[generate_button.click, prompt.submit],
+        fn=process_adapter_generation,
+        inputs=[prompt, cfg_scale, steps, selected_index, randomize_seed, seed, aspect_ratio, lora_scale, speed_mode],
+        outputs=[result, seed]
     )
 app.queue()