Spaces:
Paused
Paused
File size: 10,306 Bytes
972cb1c e8d0fbc c00c62e 972cb1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# app.py (Versão Corrigida)
import gradio as gr
from PIL import Image
import os
import imageio
from api.ltx_server import video_generation_service
from huggingface_hub import logging
logging.set_verbosity_error()
logging.set_verbosity_warning()
logging.set_verbosity_info()
logging.set_verbosity_debug()
# --- FUNÇÕES DE AJUDA PARA A UI ---
# ... (calculate_new_dimensions e handle_media_upload_for_dims permanecem as mesmas) ...
TARGET_FIXED_SIDE = 768
MIN_DIM_SLIDER = 256
MAX_IMAGE_SIZE = 1280
def calculate_new_dimensions(orig_w, orig_h):
if orig_w == 0 or orig_h == 0: return int(TARGET_FIXED_SIDE), int(TARGET_FIXED_SIDE)
if orig_w >= orig_h:
new_h, aspect_ratio = TARGET_FIXED_SIDE, orig_w / orig_h
new_w = round((new_h * aspect_ratio) / 32) * 32
new_w = max(MIN_DIM_SLIDER, min(new_w, MAX_IMAGE_SIZE))
new_h = max(MIN_DIM_SLIDER, min(new_h, MAX_IMAGE_SIZE))
else:
new_w, aspect_ratio = TARGET_FIXED_SIDE, orig_h / orig_w
new_h = round((new_w * aspect_ratio) / 32) * 32
new_h = max(MIN_DIM_SLIDER, min(new_h, MAX_IMAGE_SIZE))
new_w = max(MIN_DIM_SLIDER, min(new_w, MAX_IMAGE_SIZE))
return int(new_h), int(new_w)
def handle_media_upload_for_dims(filepath, current_h, current_w):
if not filepath or not os.path.exists(str(filepath)): return gr.update(value=current_h), gr.update(value=current_w)
try:
if str(filepath).lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
with Image.open(filepath) as img:
orig_w, orig_h = img.size
else: # Assumir que é um vídeo
with imageio.get_reader(filepath) as reader:
meta = reader.get_meta_data()
orig_w, orig_h = meta.get('size', (current_w, current_h))
new_h, new_w = calculate_new_dimensions(orig_w, orig_h)
return gr.update(value=new_h), gr.update(value=new_w)
except Exception as e:
print(f"Erro ao processar mídia para dimensões: {e}")
return gr.update(value=current_h), gr.update(value=current_w)
def update_frame_slider(duration):
"""Atualiza o valor máximo do slider de frame do meio com base na duração."""
fps = 24.0
max_frames = int(duration * fps)
# Garante que o valor padrão não seja maior que o novo máximo
new_value = 48 if max_frames >= 48 else max_frames // 2
return gr.update(maximum=max_frames, value=new_value)
# --- FUNÇÃO WRAPPER PARA CHAMAR O SERVIÇO ---
def gradio_generate_wrapper(
prompt, negative_prompt, mode,
# Entradas de Keyframe
start_image,
middle_image, middle_frame, middle_weight,
end_image, end_weight,
# Outras entradas
input_video, height, width, duration,
frames_to_use, seed, randomize_seed,
guidance_scale, improve_texture,
progress=gr.Progress(track_tqdm=True)
):
try:
def progress_handler(step, total_steps):
progress(step / total_steps, desc="Salvando vídeo...")
output_path, used_seed = video_generation_service.generate(
prompt=prompt, negative_prompt=negative_prompt, mode=mode,
start_image_filepath=start_image,
middle_image_filepath=middle_image,
middle_frame_number=middle_frame,
middle_image_weight=middle_weight,
end_image_filepath=end_image,
end_image_weight=end_weight,
input_video_filepath=input_video,
height=int(height), width=int(width), duration=float(duration),
frames_to_use=int(frames_to_use), seed=int(seed),
randomize_seed=bool(randomize_seed), guidance_scale=float(guidance_scale),
improve_texture=bool(improve_texture), progress_callback=progress_handler
)
return output_path, used_seed
except ValueError as e:
raise gr.Error(str(e))
except Exception as e:
print(f"Erro inesperado na geração: {e}")
raise gr.Error("Ocorreu um erro inesperado. Verifique os logs.")
# --- DEFINIÇÃO DA INTERFACE GRADIO ---
css = "#col-container { margin: 0 auto; max-width: 900px; }"
with gr.Blocks(css=css) as demo:
gr.Markdown("# LTX Video com Keyframes")
gr.Markdown("Guie a geração de vídeo usando imagens de início, meio e fim.")
with gr.Row():
with gr.Column():
with gr.Tab("image-to-video (Keyframes)") as image_tab:
i2v_prompt = gr.Textbox(label="Prompt", value="Uma bela transição entre as imagens", lines=2)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("#### Início (Obrigatório)")
start_image_i2v = gr.Image(label="Imagem de Início", type="filepath", sources=["upload", "clipboard"])
with gr.Row():
with gr.Accordion("Imagens condicionais opcional", open=False):
with gr.Column(scale=1):
gr.Markdown("#### Meio (Opcional)")
middle_image_i2v = gr.Image(label="Imagem do Meio", type="filepath", sources=["upload", "clipboard"])
middle_frame_i2v = gr.Slider(label="Frame Alvo", minimum=0, maximum=200, step=1, value=48)
middle_weight_i2v = gr.Slider(label="Peso/Força", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
with gr.Column(scale=1):
gr.Markdown("#### Fim (Opcional)")
end_image_i2v = gr.Image(label="Imagem de Fim", type="filepath", sources=["upload", "clipboard"])
end_weight_i2v = gr.Slider(label="Peso/Força", minimum=0.0, maximum=1.0, step=0.05, value=1.0)
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
with gr.Tab("text-to-video") as text_tab:
t2v_prompt = gr.Textbox(label="Prompt", value="A majestic dragon flying over a medieval castle", lines=3)
t2v_button = gr.Button("Generate Text-to-Video", variant="primary")
with gr.Tab("video-to-video") as video_tab:
video_v2v = gr.Video(label="Input Video", sources=["upload", "webcam"])
frames_to_use = gr.Slider(label="Frames to use from input video", minimum=9, maximum=257, value=9, step=8, info="Must be N*8+1.")
v2v_prompt = gr.Textbox(label="Prompt", value="Change the style to cinematic anime", lines=3)
v2v_button = gr.Button("Generate Video-to-Video", variant="primary")
duration_input = gr.Slider(label="Video Duration (seconds)", minimum=1, maximum=30, value=8, step=0.5)
improve_texture = gr.Checkbox(label="Improve Texture (multi-scale)", value=True, visible=True)
with gr.Column():
output_video = gr.Video(label="Generated Video", interactive=False)
with gr.Accordion("Advanced settings", open=False):
mode = gr.Dropdown(["text-to-video", "image-to-video", "video-to-video"], label="task", value="image-to-video", visible=False)
negative_prompt_input = gr.Textbox(label="Negative Prompt", value="worst quality, blurry, jittery", lines=2)
with gr.Row():
seed_input = gr.Number(label="Seed", value=42, precision=0)
randomize_seed_input = gr.Checkbox(label="Randomize Seed", value=True)
guidance_scale_input = gr.Slider(label="Guidance Scale (CFG)", minimum=1.0, maximum=10.0, value=3.0, step=0.1)
with gr.Row():
height_input = gr.Slider(label="Height", value=512, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE)
width_input = gr.Slider(label="Width", value=704, step=32, minimum=MIN_DIM_SLIDER, maximum=MAX_IMAGE_SIZE)
# --- LÓGICA DE EVENTOS DA UI ---
start_image_i2v.upload(fn=handle_media_upload_for_dims, inputs=[start_image_i2v, height_input, width_input], outputs=[height_input, width_input])
video_v2v.upload(fn=handle_media_upload_for_dims, inputs=[video_v2v, height_input, width_input], outputs=[height_input, width_input])
duration_input.change(fn=update_frame_slider, inputs=duration_input, outputs=middle_frame_i2v)
image_tab.select(fn=lambda: "image-to-video", outputs=[mode])
text_tab.select(fn=lambda: "text-to-video", outputs=[mode])
video_tab.select(fn=lambda: "video-to-video", outputs=[mode])
# Placeholders para os botões que não usam certos inputs
none_image = gr.Textbox(visible=False, value=None)
none_video = gr.Textbox(visible=False, value=None)
# Parâmetros comuns a todos
shared_params = [
height_input, width_input, duration_input, frames_to_use,
seed_input, randomize_seed_input, guidance_scale_input, improve_texture
]
i2v_inputs = [
i2v_prompt, negative_prompt_input, mode,
start_image_i2v, middle_image_i2v, middle_frame_i2v, middle_weight_i2v,
end_image_i2v, end_weight_i2v,
none_video, # Placeholder para input_video
*shared_params
]
t2v_inputs = [
t2v_prompt, negative_prompt_input, mode,
none_image, none_image, gr.Number(value=-1, visible=False), gr.Slider(value=0, visible=False), # Placeholders para keyframes
none_image, gr.Slider(value=0, visible=False),
none_video, # Placeholder para input_video
*shared_params
]
v2v_inputs = [
v2v_prompt, negative_prompt_input, mode,
none_image, none_image, gr.Number(value=-1, visible=False), gr.Slider(value=0, visible=False), # Placeholders para keyframes
none_image, gr.Slider(value=0, visible=False),
video_v2v, # Input de vídeo real
*shared_params
]
common_outputs = [output_video, seed_input]
i2v_button.click(fn=gradio_generate_wrapper, inputs=i2v_inputs, outputs=common_outputs, api_name="image_to_video_keyframes")
t2v_button.click(fn=gradio_generate_wrapper, inputs=t2v_inputs, outputs=common_outputs, api_name="text_to_video")
v2v_button.click(fn=gradio_generate_wrapper, inputs=v2v_inputs, outputs=common_outputs, api_name="video_to_video")
# --- <FIM DA CORREÇÃO> ---
if __name__ == "__main__":
demo.queue().launch(debug=True, share=False) |