Wan2.2 / app.py
roman-talker's picture
Update app.py
f3b83d2 verified
raw
history blame
1.66 kB
import gradio as gr
import torch
from diffusers import WanPipeline, UniPCMultistepScheduler
from PIL import Image
import numpy as np
import random
import os
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = WanPipeline.from_pretrained(
"Wan-AI/Wan2.2-TI2V-5B-Diffusers",
torch_dtype=torch.float16 if device=="cuda" else torch.float32
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to(device)
FIXED_FPS = 24
MIN_DURATION, MAX_DURATION = 1, 8
def generate_video(prompt, duration, init_image=None):
duration = max(MIN_DURATION, min(MAX_DURATION, duration))
input_image = None
if init_image is not None:
input_image = Image.fromarray(init_image).convert("RGB")
video = pipe(
prompt=prompt,
img=input_image,
height=512,
width=512,
duration_seconds=duration,
guidance_scale=1.0
).videos[0]
video_path = "output.mp4"
video.save(video_path)
return video_path
with gr.Blocks() as demo:
gr.Markdown("## Wan 2.2 TI2V-5B Video Generator")
with gr.Row():
prompt_input = gr.Textbox(label="Prompt", placeholder="Describe your scene")
duration_input = gr.Slider(label="Duration (seconds)", minimum=1, maximum=8, step=1, value=4)
init_image_input = gr.Image(label="Optional Initial Image", type="numpy")
generate_btn = gr.Button("Generate Video")
output_video = gr.Video(label="Generated Video")
generate_btn.click(
generate_video,
inputs=[prompt_input, duration_input, init_image_input],
outputs=output_video
)
demo.launch()