Spaces:
Runtime error
Runtime error
| import torch | |
| from diffusers import AnimateDiffSparseControlNetPipeline | |
| from diffusers.models import AutoencoderKL, MotionAdapter, SparseControlNetModel | |
| from diffusers.schedulers import DPMSolverMultistepScheduler | |
| from diffusers.utils import export_to_gif, load_image | |
| torch.backends.cuda.matmul.allow_tf32 = True # Enable TF32 for speed | |
| device = "cuda" | |
| dtype = torch.float16 | |
| # Model IDs | |
| model_id = "SG161222/Realistic_Vision_V5.1_noVAE" | |
| motion_adapter_id = "guoyww/animatediff-motion-adapter-v1-5-3" | |
| controlnet_id = "guoyww/animatediff-sparsectrl-scribble" | |
| lora_adapter_id = "guoyww/animatediff-motion-lora-v1-5-3" | |
| vae_id = "stabilityai/sd-vae-ft-mse" | |
| # Load models to device once | |
| motion_adapter = MotionAdapter.from_pretrained(motion_adapter_id, torch_dtype=dtype, device_map="auto") | |
| controlnet = SparseControlNetModel.from_pretrained(controlnet_id, torch_dtype=dtype, device_map="auto") | |
| vae = AutoencoderKL.from_pretrained(vae_id, torch_dtype=dtype, device_map="auto") | |
| # Use DPMSolverMultistepScheduler with optimizations | |
| scheduler = DPMSolverMultistepScheduler.from_pretrained( | |
| model_id, subfolder="scheduler", beta_schedule="linear", | |
| algorithm_type="dpmsolver++", use_karras_sigmas=True, | |
| ) | |
| pipe = AnimateDiffSparseControlNetPipeline.from_pretrained( | |
| model_id, motion_adapter=motion_adapter, controlnet=controlnet, | |
| vae=vae, scheduler=scheduler, torch_dtype=dtype, | |
| ).to(device) | |
| # Enable memory optimizations | |
| pipe.enable_xformers_memory_efficient_attention() | |
| pipe.load_lora_weights(lora_adapter_id, adapter_name="motion_lora") | |
| pipe.fuse_lora(lora_scale=1.0) | |
| # Preload conditioning frames | |
| image_files = [ | |
| "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-1.png", | |
| "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-2.png", | |
| "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-3.png" | |
| ] | |
| condition_frame_indices = [0, 8, 15] | |
| conditioning_frames = [load_image(img) for img in image_files] | |
| # Generator for reproducibility | |
| generator = torch.Generator(device).manual_seed(1337) | |
| # Inference with memory optimizations | |
| with torch.inference_mode(): | |
| video = pipe( | |
| prompt="an aerial view of a cyberpunk city, night time, neon lights, masterpiece, high quality", | |
| negative_prompt="low quality, worst quality, letterboxed", | |
| num_inference_steps=25, | |
| conditioning_frames=conditioning_frames, | |
| controlnet_conditioning_scale=1.0, | |
| controlnet_frame_indices=condition_frame_indices, | |
| generator=generator, | |
| ).frames[0] | |
| export_to_gif(video, "output.gif") | |
| # Free memory | |
| del pipe, motion_adapter, controlnet, vae | |
| torch.cuda.empty_cache() | |