Spaces:
Running
on
Zero
Running
on
Zero
File size: 10,952 Bytes
c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf bfc6564 c8a8fcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import os
if os.getcwd() != '/home/user/app':
os.chdir('/home/user/app')
import sys
import subprocess
import asyncio
from typing import Sequence, Mapping, Any, Union
print("Importing ComfyUI's main.py for setup...")
import main
print("ComfyUI main imported.")
import torch
import gradio as gr
from huggingface_hub import hf_hub_download
from comfy import model_management
import spaces
from PIL import Image
import random
import nodes # Import nodes after main has set everything up
# --- Manually trigger the node initialization ---
# This step is normally done inside main.start_comfyui(), but we do it here.
# It loads all built-in, extra, and custom nodes into the NODE_CLASS_MAPPINGS.
print("Initializing ComfyUI nodes...")
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(nodes.init_extra_nodes())
print("Nodes initialized.")
# --- Helper function from the original script ---
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
try:
return obj[index]
except KeyError:
return obj["result"][index]
# --- Model Downloads ---
print("Downloading models from Hugging Face Hub...")
hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safensors", local_dir="models/text_encoders")
hf_hub_download(repo_id="Comfy-Org/Wan_2.2_ComfyUI_Repackaged", filename="split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safensors", local_dir="models/diffusion_models")
hf_hub_download(repo_id="Comfy-Org/Wan_2.2_ComfyUI_Repackaged", filename="split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", local_dir="models/diffusion_models")
hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/vae/wan_2.1_vae.safetensors", local_dir="models/vae")
hf_hub_download(repo_id="Comfy-Org/Wan_2.1_ComfyUI_repackaged", filename="split_files/clip_vision/clip_vision_h.safetensors", local_dir="models/clip_vision")
hf_hub_download(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_HIGH_fp16.safetensors", local_dir="models/loras")
hf_hub_download(repo_id="Kijai/WanVideo_comfy", filename="Wan22-Lightning/Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", local_dir="models/loras")
print("Downloads complete.")
# --- ZeroGPU: Pre-load models and instantiate nodes globally ---
# This part will now work because NODE_CLASS_MAPPINGS is correctly populated.
cliploader = nodes.NODE_CLASS_MAPPINGS["CLIPLoader"]()
cliptextencode = nodes.NODE_CLASS_MAPPINGS["CLIPTextEncode"]()
unetloader = nodes.NODE_CLASS_MAPPINGS["UNETLoader"]()
vaeloader = nodes.NODE_CLASS_MAPPINGS["VAELoader"]()
clipvisionloader = nodes.NODE_CLASS_MAPPINGS["CLIPVisionLoader"]()
loadimage = nodes.NODE_CLASS_MAPPINGS["LoadImage"]()
clipvisionencode = nodes.NODE_CLASS_MAPPINGS["CLIPVisionEncode"]()
loraloadermodelonly = nodes.NODE_CLASS_MAPPINGS["LoraLoaderModelOnly"]()
modelsamplingsd3 = nodes.NODE_CLASS_MAPPINGS["ModelSamplingSD3"]()
pathchsageattentionkj = nodes.NODE_CLASS_MAPPINGS["PathchSageAttentionKJ"]()
wanfirstlastframetovideo = nodes.NODE_CLASS_MAPPINGS["WanFirstLastFrameToVideo"]()
ksampleradvanced = nodes.NODE_CLASS_MAPPINGS["KSamplerAdvanced"]()
vaedecode = nodes.NODE_CLASS_MAPPINGS["VAEDecode"]()
createvideo = nodes.NODE_CLASS_MAPPINGS["CreateVideo"]()
savevideo = nodes.NODE_CLASS_MAPPINGS["SaveVideo"]()
imageresize = nodes.NODE_CLASS_MAPPINGS["ImageResize+"]()
cliploader_38 = cliploader.load_clip(clip_name="umt5_xxl_fp8_e4m3fn_scaled.safetensors", type="wan", device="cpu")
unetloader_37_low_noise = unetloader.load_unet(unet_name="wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
unetloader_91_high_noise = unetloader.load_unet(unet_name="wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", weight_dtype="default")
vaeloader_39 = vaeloader.load_vae(vae_name="wan_2.1_vae.safetensors")
clipvisionloader_49 = clipvisionloader.load_clip(clip_name="clip_vision_h.safetensors")
loraloadermodelonly_94_high = loraloadermodelonly.load_lora_model_only(lora_name="Wan2.2-Lightning_I2V-A14B-4steps-lora_HIGH_fp16.safetensors", strength_model=0.8, model=get_value_at_index(unetloader_91_high_noise, 0))
loraloadermodelonly_95_low = loraloadermodelonly.load_lora_model_only(lora_name="Wan2.2-Lightning_I2V-A14B-4steps-lora_LOW_fp16.safetensors", strength_model=0.8, model=get_value_at_index(unetloader_37_low_noise, 0))
modelsamplingsd3_93_low = modelsamplingsd3.patch(shift=8, model=get_value_at_index(loraloadermodelonly_95_low, 0))
pathchsageattentionkj_98_low = pathchsageattentionkj.patch(sage_attention="auto", model=get_value_at_index(modelsamplingsd3_93_low, 0))
modelsamplingsd3_79_high = modelsamplingsd3.patch(shift=8, model=get_value_at_index(loraloadermodelonly_94_high, 0))
pathchsageattentionkj_96_high = pathchsageattentionkj.patch(sage_attention="auto", model=get_value_at_index(modelsamplingsd3_79_high, 0))
model_loaders = [cliploader_38, unetloader_37_low_noise, unetloader_91_high_noise, vaeloader_39, clipvisionloader_49, loraloadermodelonly_94_high, loraloadermodelonly_95_low]
valid_models = [getattr(loader[0], 'patcher', loader[0]) for loader in model_loaders if not isinstance(loader[0], dict) and not isinstance(getattr(loader[0], 'patcher', None), dict)]
model_management.load_models_gpu(valid_models)
# --- App Logic ---
def calculate_dimensions(image_path):
with Image.open(image_path) as img: width, height = img.size
if width == height: return 480, 480
if width > height: new_width, new_height = 832, int(height * (832 / width))
else: new_height, new_width = 832, int(width * (832 / height))
return (new_width // 16) * 16, (new_height // 16) * 16
@spaces.GPU(duration=120)
def generate_video(prompt, first_image_path, last_image_path, duration_seconds):
with torch.inference_mode():
FPS, MAX_FRAMES = 16, 81
length_in_frames = max(1, min(int(duration_seconds * FPS), MAX_FRAMES))
print(f"Requested duration: {duration_seconds}s. Calculated frames: {length_in_frames}")
target_width, target_height = calculate_dimensions(first_image_path)
loaded_first_image = loadimage.load_image(image=first_image_path)
resized_first_image = imageresize.execute(width=target_width, height=target_height, interpolation="bicubic", method="stretch", image=get_value_at_index(loaded_first_image, 0))
loaded_last_image = loadimage.load_image(image=last_image_path)
resized_last_image = imageresize.execute(width=target_width, height=target_height, interpolation="bicubic", method="stretch", image=get_value_at_index(loaded_last_image, 0))
cliptextencode_6 = cliptextencode.encode(text=prompt, clip=get_value_at_index(cliploader_38, 0))
cliptextencode_7_negative = cliptextencode.encode(text="low quality, worst quality, jpeg artifacts, ugly, deformed, blurry", clip=get_value_at_index(cliploader_38, 0))
clipvisionencode_51 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(resized_first_image, 0))
clipvisionencode_87 = clipvisionencode.encode(crop="none", clip_vision=get_value_at_index(clipvisionloader_49, 0), image=get_value_at_index(resized_last_image, 0))
wanfirstlastframetovideo_83 = wanfirstlastframetovideo.EXECUTE_NORMALIZED(width=target_width, height=target_height, length=length_in_frames, batch_size=1, positive=get_value_at_index(cliptextencode_6, 0), negative=get_value_at_index(cliptextencode_7_negative, 0), vae=get_value_at_index(vaeloader_39, 0), clip_vision_start_image=get_value_at_index(clipvisionencode_51, 0), clip_vision_end_image=get_value_at_index(clipvisionencode_87, 0), start_image=get_value_at_index(resized_first_image, 0), end_image=get_value_at_index(resized_last_image, 0))
ksampler_positive = get_value_at_index(wanfirstlastframetovideo_83, 0)
ksampler_negative = get_value_at_index(wanfirstlastframetovideo_83, 1)
ksampler_latent = get_value_at_index(wanfirstlastframetovideo_83, 2)
ksampleradvanced_101 = ksampleradvanced.sample(add_noise="enable", noise_seed=random.randint(1, 2**64), steps=8, cfg=1, sampler_name="euler", scheduler="simple", start_at_step=0, end_at_step=4, return_with_leftover_noise="enable", model=get_value_at_index(pathchsageattentionkj_96_high, 0), positive=ksampler_positive, negative=ksampler_negative, latent_image=ksampler_latent)
ksampleradvanced_102 = ksampleradvanced.sample(add_noise="disable", noise_seed=random.randint(1, 2**64), steps=8, cfg=1, sampler_name="euler", scheduler="simple", start_at_step=4, end_at_step=10000, return_with_leftover_noise="disable", model=get_value_at_index(pathchsageattentionkj_98_low, 0), positive=ksampler_positive, negative=ksampler_negative, latent_image=get_value_at_index(ksampleradvanced_101, 0))
vaedecode_8 = vaedecode.decode(samples=get_value_at_index(ksampleradvanced_102, 0), vae=get_value_at_index(vaeloader_39, 0))
createvideo_104 = createvideo.create_video(fps=16, images=get_value_at_index(vaedecode_8, 0))
savevideo_103 = savevideo.save_video(filename_prefix="ComfyUI_Video", format="mp4", codec="libx264", video=get_value_at_index(createvideo_104, 0))
return f"output/{savevideo_103['ui']['videos'][0]['filename']}"
# --- Gradio Interface (no changes needed) ---
with gr.Blocks() as app:
gr.Markdown("# Wan 2.2 First/Last Frame to Video")
gr.Markdown("Provide a starting image, an ending image, a text prompt, and a desired duration to generate a video transitioning between them.")
with gr.Row():
with gr.Column(scale=1):
prompt_input = gr.Textbox(label="Prompt", value="a man dancing in the street, cinematic")
duration_slider = gr.Slider(minimum=1.0, maximum=5.0, value=2.0, step=0.1, label="Video Duration (seconds)")
with gr.Row():
first_image = gr.Image(label="First Frame", type="filepath")
last_image = gr.Image(label="Last Frame", type="filepath")
generate_btn = gr.Button("Generate Video")
with gr.Column(scale=2):
output_video = gr.Video(label="Generated Video")
generate_btn.click(fn=generate_video, inputs=[prompt_input, first_image, last_image, duration_slider], outputs=[output_video])
gr.Examples(examples=[["a beautiful woman, cinematic", "examples/start.png", "examples/end.png", 2.5]], inputs=[prompt_input, first_image, last_image, duration_slider])
if __name__ == "__main__":
if not os.path.exists("examples"): os.makedirs("examples")
if not os.path.exists("examples/start.png"): Image.new('RGB', (512, 512), color='red').save('examples/start.png')
if not os.path.exists("examples/end.png"): Image.new('RGB', (512, 512), color='blue').save('examples/end.png')
app.launch() |