Wan2.2 / app.py
roman-talker's picture
Update app.py
f3b83d2 verified
import gradio as gr
import torch
from diffusers import WanPipeline, UniPCMultistepScheduler
from PIL import Image
import numpy as np
import random
import os
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = WanPipeline.from_pretrained(
"Wan-AI/Wan2.2-TI2V-5B-Diffusers",
torch_dtype=torch.float16 if device=="cuda" else torch.float32
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to(device)
FIXED_FPS = 24
MIN_DURATION, MAX_DURATION = 1, 8
def generate_video(prompt, duration, init_image=None):
duration = max(MIN_DURATION, min(MAX_DURATION, duration))
input_image = None
if init_image is not None:
input_image = Image.fromarray(init_image).convert("RGB")
video = pipe(
prompt=prompt,
img=input_image,
height=512,
width=512,
duration_seconds=duration,
guidance_scale=1.0
).videos[0]
video_path = "output.mp4"
video.save(video_path)
return video_path
with gr.Blocks() as demo:
gr.Markdown("## Wan 2.2 TI2V-5B Video Generator")
with gr.Row():
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe your scene")
duration_input = gr.Slider(label="Duration (seconds)", minimum=1, maximum=8, step=1, value=4)
init_image_input = gr.Image(label="Optional Initial Image", type="numpy")
generate_btn = gr.Button("Generate Video")
output_video = gr.Video(label="Generated Video")
generate_btn.click(
generate_video,
inputs=[prompt_input, duration_input, init_image_input],
outputs=output_video
)
demo.launch()