Spaces:

multimodalart
/

hypernoise-sana-sprint

Running on Zero

App Files Files Community

hypernoise-sana-sprint / app.py

multimodalart HF Staff

Update app.py

1308281 verified 3 months ago

raw

history blame contribute delete

6.08 kB

	import gradio as gr
	import numpy as np
	import random
	import spaces
	import torch
	from diffusers import SanaSprintPipeline
	import peft
	from peft.tuners.lora.layer import Linear as LoraLinear
	import types

	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"
	adapter_name = "hypernoise_adapter"

	# Load the pipeline and adapter
	pipe = SanaSprintPipeline.from_pretrained(
	"Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers",
	torch_dtype=dtype,
	).to(device, dtype)

	pipe.transformer = peft.PeftModel.from_pretrained(
	pipe.transformer,
	"lucaeyring/HyperNoise_Sana_Sprint_0.6B",
	adapter_name=adapter_name,
	dtype=dtype,
	).to(device, dtype)

	# Define the custom forward function for LoRA
	def scaled_base_lora_forward(self, x, args, *kwargs):
	if self.disable_adapters:
	return self.base_layer(x, args, *kwargs)
	return self.lora_B[adapter_name](self.lora_A[adapter_name](x)) * self.scaling[adapter_name]

	# Apply the custom forward to proj_out module
	for name, module in pipe.transformer.base_model.model.named_modules():
	if name == "proj_out" and isinstance(module, LoraLinear):
	module.forward = types.MethodType(scaled_base_lora_forward, module)
	break

	MAX_SEED = np.iinfo(np.int32).max
	MAX_IMAGE_SIZE = 1024 # Sana Sprint is optimized for 1024px

	@spaces.GPU()
	def infer(prompt, seed=42, randomize_seed=False, width=1024, height=1024,
	num_inference_steps=4, guidance_scale=4.5, progress=gr.Progress(track_tqdm=True)):

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	# Set random seed for reproducibility
	torch.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)

	with torch.inference_mode():
	# Encode the prompt
	prompt_embeds, prompt_attention_mask = pipe.encode_prompt(
	[prompt],
	device=device
	)

	# Generate initial random latents
	init_latents = torch.randn(
	[1, 32, 32, 32],
	device=device,
	dtype=dtype
	)

	# Apply HyperNoise modulation with adapter enabled
	pipe.transformer.enable_adapter_layers()
	modulated_latents = pipe.transformer(
	hidden_states=init_latents,
	encoder_hidden_states=prompt_embeds,
	encoder_attention_mask=prompt_attention_mask,
	guidance=torch.tensor([guidance_scale], device=device, dtype=dtype) * 0.1,
	timestep=torch.tensor([1.0], device=device, dtype=dtype),
	).sample + init_latents

	# Generate final image with adapter disabled
	pipe.transformer.disable_adapter_layers()
	image = pipe(
	latents=modulated_latents,
	prompt_embeds=prompt_embeds,
	prompt_attention_mask=prompt_attention_mask,
	intermediate_timesteps=None,
	num_inference_steps=num_inference_steps,
	height=height,
	width=width,
	).images[0]

	return image, seed

	examples = [
	"A smiling slice of pizza doing yoga on a mountain top",
	"A fluffy cat wearing a wizard hat casting spells",
	"A robot painting a self-portrait in Van Gogh style",
	"A tiny dragon sleeping in a teacup",
	"An astronaut riding a unicorn through a rainbow",
	]

	css = """
	#col-container {
	margin: 0 auto;
	max-width: 520px;
	}
	"""

	with gr.Blocks(css=css) as demo:

	with gr.Column(elem_id="col-container"):
	gr.Markdown("""# HyperNoise Sana Sprint 0.6B

	Fast text-to-image generation with HyperNoise adapter for Sana Sprint model.

	[[Sana Sprint Model](https://huggingface.co/Efficient-Large-Model/Sana_Sprint_0.6B_1024px_diffusers)]
	[[HyperNoise Adapter](https://huggingface.co/lucaeyring/HyperNoise_Sana_Sprint_0.6B)]
	""")

	with gr.Row():
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt",
	container=False,
	)

	run_button = gr.Button("Run", scale=0)

	num_inference_steps = gr.Slider(
	label="Inference Steps",
	info="Higher gets more quality and a bit slower (but even 20 is still very fast!)",
	minimum=1,
	maximum=20,
	step=1,
	value=4,
	)
	result = gr.Image(label="Result", show_label=False)

	with gr.Accordion("Advanced Settings", open=False):

	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=42,
	)

	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	with gr.Row():
	width = gr.Slider(
	label="Width",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=64,
	value=1024,
	)

	height = gr.Slider(
	label="Height",
	minimum=256,
	maximum=MAX_IMAGE_SIZE,
	step=64,
	value=1024,
	)

	with gr.Row():

	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1.0,
	maximum=10.0,
	step=0.5,
	value=4.5,
	)

	gr.Examples(
	examples=examples,
	fn=infer,
	inputs=[prompt],
	outputs=[result, seed],
	cache_examples="lazy"
	)

	gr.on(
	triggers=[run_button.click, prompt.submit],
	fn=infer,
	inputs=[prompt, seed, randomize_seed, width, height, num_inference_steps, guidance_scale],
	outputs=[result, seed]
	)

	demo.launch()