|
|
import argparse |
|
|
import os |
|
|
from datetime import datetime |
|
|
from pathlib import Path |
|
|
from typing import List |
|
|
import numpy as np |
|
|
import torch |
|
|
from PIL import Image |
|
|
import gradio as gr |
|
|
import json |
|
|
import imageio |
|
|
|
|
|
|
|
|
try: |
|
|
from huggingface_hub import snapshot_download |
|
|
from diffusers import AutoencoderKL, DDIMScheduler |
|
|
from transformers import CLIPVisionModelWithProjection |
|
|
from omegaconf import OmegaConf |
|
|
import spaces |
|
|
HAS_MODELS = True |
|
|
except ImportError as e: |
|
|
print(f"Warning: Some dependencies not available: {e}") |
|
|
HAS_MODELS = False |
|
|
|
|
|
MOTION_TRIGGER_WORD = { |
|
|
'sports_basketball_gym': 'Basketball in Gym', |
|
|
'sports_nba_pass': 'NBA Pass', |
|
|
'sports_nba_dunk': 'NBA Dunk', |
|
|
'movie_BruceLee1': 'Bruce Lee Style', |
|
|
'shorts_kungfu_match1': 'Kung Fu Match', |
|
|
'shorts_kungfu_desert1': 'Desert Kung Fu', |
|
|
'parkour_climbing': 'Parkour Climbing', |
|
|
'dance_indoor_1': 'Indoor Dance', |
|
|
} |
|
|
|
|
|
css_style = "#fixed_size_img {height: 500px;}" |
|
|
|
|
|
def download_models(): |
|
|
"""Download required models from Hugging Face - simplified for demo""" |
|
|
print("Model downloading simulation...") |
|
|
|
|
|
|
|
|
os.makedirs('./pretrained_weights', exist_ok=True) |
|
|
os.makedirs('./assets/masks', exist_ok=True) |
|
|
os.makedirs('./assets/test_image', exist_ok=True) |
|
|
os.makedirs('./assets/video_template', exist_ok=True) |
|
|
|
|
|
if HAS_MODELS: |
|
|
|
|
|
pass |
|
|
else: |
|
|
print("Skipping model download - dependencies not available") |
|
|
|
|
|
class MIMODemo(): |
|
|
def __init__(self): |
|
|
self.device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"Using device: {self.device}") |
|
|
|
|
|
try: |
|
|
download_models() |
|
|
print("MIMO demo initialized") |
|
|
except Exception as e: |
|
|
print(f"Initialization warning: {e}") |
|
|
|
|
|
def generate_video(self, image, motion_template): |
|
|
"""Generate video from image and motion template""" |
|
|
try: |
|
|
if image is None: |
|
|
return None, "β οΈ Please upload an image first." |
|
|
|
|
|
print(f"Processing with template: {motion_template}") |
|
|
|
|
|
|
|
|
frames = [] |
|
|
for i in range(30): |
|
|
|
|
|
img_array = np.array(image) |
|
|
|
|
|
shift = int(10 * np.sin(i * 0.2)) |
|
|
transformed = np.roll(img_array, shift, axis=1) |
|
|
frames.append(transformed) |
|
|
|
|
|
|
|
|
save_dir = 'output' |
|
|
os.makedirs(save_dir, exist_ok=True) |
|
|
case = datetime.now().strftime("%Y%m%d%H%M%S") |
|
|
outpath = f"{save_dir}/{case}.mp4" |
|
|
|
|
|
imageio.mimsave(outpath, frames, fps=15, quality=8) |
|
|
print(f'Demo video saved to: {outpath}') |
|
|
|
|
|
return outpath, f"β
Generated demo animation for {MOTION_TRIGGER_WORD[motion_template]}!" |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in video generation: {e}") |
|
|
return None, f"β Error: {str(e)}" |
|
|
|
|
|
def create_interface(): |
|
|
"""Create Gradio interface compatible with v3.41.2""" |
|
|
|
|
|
|
|
|
mimo = MIMODemo() |
|
|
|
|
|
|
|
|
css = """ |
|
|
#fixed_size_img { |
|
|
height: 500px !important; |
|
|
max-height: 500px !important; |
|
|
} |
|
|
.gradio-container { |
|
|
max-width: 1200px !important; |
|
|
margin: auto !important; |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(css=css, title="MIMO Demo") as demo: |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<div style="text-align: center; margin-bottom: 20px;"> |
|
|
<h1>π MIMO Demo - Controllable Character Video Synthesis</h1> |
|
|
<p>Transform character images into animated videos with controllable motion and scenes</p> |
|
|
<p> |
|
|
<a href="https://menyifang.github.io/projects/MIMO/index.html" target="_blank">Project Page</a> | |
|
|
<a href="https://arxiv.org/abs/2409.16160" target="_blank">Paper</a> | |
|
|
<a href="https://github.com/menyifang/MIMO" target="_blank">GitHub</a> |
|
|
</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
with gr.Accordion("π§ Instructions", open=True): |
|
|
gr.Markdown(""" |
|
|
### How to use: |
|
|
1. **Upload a character image**: Use a full-body, front-facing image with clear visibility |
|
|
2. **Select motion template**: Choose from the available motion templates |
|
|
3. **Generate**: Click "Generate Animation" to create your character animation |
|
|
|
|
|
### Tips: |
|
|
- Best results with clear, well-lit character images |
|
|
- Processing may take 1-2 minutes depending on video length |
|
|
- This is a demo version - full functionality requires GPU resources |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
|
|
|
img_input = gr.Image( |
|
|
label='Upload Character Image', |
|
|
type="pil", |
|
|
elem_id="fixed_size_img" |
|
|
) |
|
|
|
|
|
|
|
|
motion_dropdown = gr.Dropdown( |
|
|
choices=list(MOTION_TRIGGER_WORD.keys()), |
|
|
value=list(MOTION_TRIGGER_WORD.keys())[0], |
|
|
label="Select Motion Template", |
|
|
) |
|
|
|
|
|
|
|
|
submit_btn = gr.Button("π¬ Generate Animation", variant='primary') |
|
|
|
|
|
|
|
|
status_text = gr.Textbox( |
|
|
label="Status", |
|
|
interactive=False, |
|
|
value="Ready to generate... (Demo mode)" |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
|
|
|
output_video = gr.Video( |
|
|
label="Generated Animation", |
|
|
elem_id="fixed_size_img" |
|
|
) |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
fn=mimo.generate_video, |
|
|
inputs=[img_input, motion_dropdown], |
|
|
outputs=[output_video, status_text], |
|
|
) |
|
|
|
|
|
|
|
|
example_dir = './assets/test_image' |
|
|
if os.path.exists(example_dir): |
|
|
example_files = [f for f in os.listdir(example_dir) if f.endswith(('.jpg', '.png', '.jpeg'))] |
|
|
if example_files: |
|
|
example_paths = [[os.path.join(example_dir, f)] for f in example_files[:5]] |
|
|
gr.Examples( |
|
|
examples=example_paths, |
|
|
inputs=[img_input], |
|
|
label="Example Images" |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("π Starting MIMO Demo...") |
|
|
|
|
|
|
|
|
demo = create_interface() |
|
|
|
|
|
|
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False, |
|
|
show_error=True, |
|
|
quiet=False |
|
|
) |