Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import numpy as np | |
| import random | |
| import spaces | |
| import torch | |
| from diffusers import SanaSprintPipeline | |
| import peft | |
| from peft.tuners.lora.layer import Linear as LoraLinear | |
| import types | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| adapter_name = "hypernoise_adapter" | |
| # Load the pipeline and adapter | |
| pipe = SanaSprintPipeline.from_pretrained( | |
| "Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers", | |
| torch_dtype=dtype, | |
| ).to(device, dtype) | |
| pipe.transformer = peft.PeftModel.from_pretrained( | |
| pipe.transformer, | |
| "lucaeyring/HyperNoise_Sana_Sprint_0.6B", | |
| adapter_name=adapter_name, | |
| dtype=dtype, | |
| ).to(device, dtype) | |
| # Define the custom forward function for LoRA | |
| def scaled_base_lora_forward(self, x, *args, **kwargs): | |
| if self.disable_adapters: | |
| return self.base_layer(x, *args, **kwargs) | |
| return self.lora_B[adapter_name](self.lora_A[adapter_name](x)) * self.scaling[adapter_name] | |
| # Apply the custom forward to proj_out module | |
| for name, module in pipe.transformer.base_model.model.named_modules(): | |
| if name == "proj_out" and isinstance(module, LoraLinear): | |
| module.forward = types.MethodType(scaled_base_lora_forward, module) | |
| break | |
| MAX_SEED = np.iinfo(np.int32).max | |
| MAX_IMAGE_SIZE = 1024 # Sana Sprint is optimized for 1024px | |
| def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024, | |
| num_inference_steps=4, guidance_scale=4.5, progress=gr.Progress(track_tqdm=True)): | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| # Set random seed for reproducibility | |
| torch.manual_seed(seed) | |
| torch.cuda.manual_seed_all(seed) | |
| with torch.inference_mode(): | |
| # Encode the prompt | |
| prompt_embeds, prompt_attention_mask = pipe.encode_prompt( | |
| [prompt], | |
| device=device | |
| ) | |
| # Generate initial random latents | |
| init_latents = torch.randn( | |
| [1, 32, 32, 32], | |
| device=device, | |
| dtype=dtype | |
| ) | |
| # Apply HyperNoise modulation with adapter enabled | |
| pipe.transformer.enable_adapter_layers() | |
| modulated_latents = pipe.transformer( | |
| hidden_states=init_latents, | |
| encoder_hidden_states=prompt_embeds, | |
| encoder_attention_mask=prompt_attention_mask, | |
| guidance=torch.tensor([guidance_scale], device=device, dtype=dtype) * 0.1, | |
| timestep=torch.tensor([1.0], device=device, dtype=dtype), | |
| ).sample + init_latents | |
| # Generate final image with adapter disabled | |
| pipe.transformer.disable_adapter_layers() | |
| image = pipe( | |
| latents=modulated_latents, | |
| prompt_embeds=prompt_embeds, | |
| prompt_attention_mask=prompt_attention_mask, | |
| intermediate_timesteps=None, | |
| num_inference_steps=num_inference_steps, | |
| height=height, | |
| width=width, | |
| ).images[0] | |
| return image, seed | |
| examples = [ | |
| "A smiling slice of pizza doing yoga on a mountain top", | |
| "A fluffy cat wearing a wizard hat casting spells", | |
| "A robot painting a self-portrait in Van Gogh style", | |
| "A tiny dragon sleeping in a teacup", | |
| "An astronaut riding a unicorn through a rainbow", | |
| ] | |
| css = """ | |
| #col-container { | |
| margin: 0 auto; | |
| max-width: 520px; | |
| } | |
| """ | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown("""# HyperNoise Sana Sprint 0.6B | |
| Fast text-to-image generation with HyperNoise adapter for Sana Sprint model. | |
| [[Sana Sprint Model](https://huggingface.co/Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers)] | |
| [[HyperNoise Adapter](https://huggingface.co/lucaeyring/HyperNoise_Sana_Sprint_0.6B)] | |
| """) | |
| with gr.Row(): | |
| prompt = gr.Text( | |
| label="Prompt", | |
| show_label=False, | |
| max_lines=1, | |
| placeholder="Enter your prompt", | |
| container=False, | |
| ) | |
| run_button = gr.Button("Run", scale=0) | |
| num_inference_steps = gr.Slider( | |
| label="Inference Steps", | |
| info="Higher gets more quality and a bit slower (but even 20 is still very fast!)", | |
| minimum=1, | |
| maximum=20, | |
| step=1, | |
| value=4, | |
| ) | |
| result = gr.Image(label="Result", show_label=False) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=42, | |
| ) | |
| randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
| with gr.Row(): | |
| width = gr.Slider( | |
| label="Width", | |
| minimum=256, | |
| maximum=MAX_IMAGE_SIZE, | |
| step=64, | |
| value=1024, | |
| ) | |
| height = gr.Slider( | |
| label="Height", | |
| minimum=256, | |
| maximum=MAX_IMAGE_SIZE, | |
| step=64, | |
| value=1024, | |
| ) | |
| with gr.Row(): | |
| guidance_scale = gr.Slider( | |
| label="Guidance Scale", | |
| minimum=1.0, | |
| maximum=10.0, | |
| step=0.5, | |
| value=4.5, | |
| ) | |
| gr.Examples( | |
| examples=examples, | |
| fn=infer, | |
| inputs=[prompt], | |
| outputs=[result, seed], | |
| cache_examples="lazy" | |
| ) | |
| gr.on( | |
| triggers=[run_button.click, prompt.submit], | |
| fn=infer, | |
| inputs=[prompt, seed, randomize_seed, width, height, num_inference_steps, guidance_scale], | |
| outputs=[result, seed] | |
| ) | |
| demo.launch() |