Eueuiaa commited on
Commit
d795566
·
verified ·
1 Parent(s): b28f089

Update api/ltx_server_refactored.py

Browse files
Files changed (1) hide show
  1. api/ltx_server_refactored.py +368 -630
api/ltx_server_refactored.py CHANGED
@@ -1,72 +1,86 @@
1
- # ltx_server_refactored.py — VideoService (Modular Version with Simple Overlap Chunking)
2
- # Em api/ltx_server_refactored.py
 
3
 
4
- import warnings
5
- from huggingface_hub import logging
6
- import os, subprocess, shlex, tempfile
7
- import torch
8
  import json
9
- import numpy as np
10
- import random
11
  import os
12
- import io
13
-
14
- import shlex
15
- import yaml
16
- from typing import List, Dict
17
- from pathlib import Path
18
- import imageio
19
- from PIL import Image
20
- import tempfile
21
- from huggingface_hub import hf_hub_download
22
- import sys
23
- import subprocess
24
- import gc
25
  import shutil
26
- import contextlib
 
 
27
  import time
28
  import traceback
29
- from api.gpu_manager import gpu_manager
 
 
 
 
 
30
  from einops import rearrange
31
- import torch.nn.functional as F
32
- from managers.vae_manager import vae_manager_singleton
33
- from tools.video_encode_tool import video_encode_tool_singleton
 
 
34
 
 
 
 
 
 
 
35
  DEPS_DIR = Path("/data")
36
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
37
- logging.set_verbosity_error()
38
- logging.set_verbosity_warning()
39
- logging.set_verbosity_info()
40
- logging.set_verbosity_debug()
41
- LTXV_DEBUG=1
42
- LTXV_FRAME_LOG_EVERY=8
43
- warnings.filterwarnings("ignore", category=UserWarning)
44
- warnings.filterwarnings("ignore", category=FutureWarning)
45
- warnings.filterwarnings("ignore", message=".*")
46
-
47
- # (Todas as funções de setup, helpers e inicialização da classe permanecem inalteradas)
48
- # ... (run_setup, add_deps_to_path, _query_gpu_processes_via_nvml, etc.)
49
- def run_setup():
50
- setup_script_path = "setup.py"
51
- if not os.path.exists(setup_script_path):
52
- print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
53
- return
54
- try:
55
- print("[DEBUG] Executando setup.py para dependências...")
56
- subprocess.run([sys.executable, setup_script_path], check=True)
57
- print("[DEBUG] Setup concluído com sucesso.")
58
- except subprocess.CalledProcessError as e:
59
- print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
60
- sys.exit(1)
61
- if not LTX_VIDEO_REPO_DIR.exists():
62
- print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
63
- run_setup()
64
  def add_deps_to_path():
 
65
  repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
66
- if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
67
  sys.path.insert(0, repo_path)
68
- print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
69
- def calculate_padding(orig_h, orig_w, target_h, target_w):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  pad_h = target_h - orig_h
71
  pad_w = target_w - orig_w
72
  pad_top = pad_h // 2
@@ -74,651 +88,375 @@ def calculate_padding(orig_h, orig_w, target_h, target_w):
74
  pad_left = pad_w // 2
75
  pad_right = pad_w - pad_left
76
  return (pad_left, pad_right, pad_top, pad_bottom)
77
- def log_tensor_info(tensor, name="Tensor"):
 
 
78
  if not isinstance(tensor, torch.Tensor):
79
- print(f"\n[INFO] '{name}' não é tensor.")
80
  return
81
- print(f"\n--- Tensor: {name} ---")
82
- print(f" - Shape: {tuple(tensor.shape)}")
83
- print(f" - Dtype: {tensor.dtype}")
84
- print(f" - Device: {tensor.device}")
 
 
 
85
  if tensor.numel() > 0:
86
  try:
87
- print(f" - Min: {tensor.min().item():.4f} Max: {tensor.max().item():.4f} Mean: {tensor.mean().item():.4f}")
 
 
 
 
88
  except Exception:
89
- pass
90
- print("------------------------------------------\n")
91
 
92
- add_deps_to_path()
93
- from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
94
- from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
95
- from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
96
- from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
97
- from api.ltx.inference import (
98
- create_ltx_video_pipeline,
99
- create_latent_upsampler,
100
- load_image_to_tensor_with_resize_and_crop,
101
- seed_everething,
102
- )
103
 
104
  class VideoService:
105
- def _load_config(self):
106
- base = LTX_VIDEO_REPO_DIR / "configs"
107
- config_path = base / "ltxv-13b-0.9.8-distilled-fp8.yaml"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  with open(config_path, "r") as file:
109
  return yaml.safe_load(file)
110
 
111
- def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
112
- print("[DEBUG] Finalize: iniciando limpeza...")
113
- keep = set(keep_paths or []); extras = set(extra_paths or [])
114
- gc.collect()
115
- try:
116
- if clear_gpu and torch.cuda.is_available():
117
- torch.cuda.empty_cache()
118
- try:
119
- torch.cuda.ipc_collect()
120
- except Exception:
121
- pass
122
- except Exception as e:
123
- print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
124
-
125
- def _load_models(self):
126
  t0 = time.perf_counter()
127
- LTX_REPO = "Lightricks/LTX-Video"
128
- print("[DEBUG] Baixando checkpoint principal...")
129
  distilled_model_path = hf_hub_download(
130
- repo_id=LTX_REPO,
131
  filename=self.config["checkpoint_path"],
132
- local_dir=os.getenv("HF_HOME"),
133
- cache_dir=os.getenv("HF_HOME_CACHE"),
134
  token=os.getenv("HF_TOKEN"),
135
  )
136
  self.config["checkpoint_path"] = distilled_model_path
137
- print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
138
-
139
- print("[DEBUG] Baixando upscaler espacial...")
140
- spatial_upscaler_path = hf_hub_download(
141
- repo_id=LTX_REPO,
142
- filename=self.config["spatial_upscaler_model_path"],
143
- local_dir=os.getenv("HF_HOME"),
144
- cache_dir=os.getenv("HF_HOME_CACHE"),
145
- token=os.getenv("HF_TOKEN")
146
- )
147
- self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
148
- print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
149
 
150
- print("[DEBUG] Construindo pipeline...")
151
  pipeline = create_ltx_video_pipeline(
152
  ckpt_path=self.config["checkpoint_path"],
153
  precision=self.config["precision"],
154
- text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
155
- sampler=self.config["sampler"],
156
- device="cpu",
157
- enhance_prompt=False,
158
- prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
159
- prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
160
  )
161
- print("[DEBUG] Pipeline pronto.")
162
-
163
  latent_upsampler = None
164
  if self.config.get("spatial_upscaler_model_path"):
165
- print("[DEBUG] Construindo latent_upsampler...")
 
 
 
 
 
 
166
  latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
167
- print("[DEBUG] Upsampler pronto.")
168
- print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
169
- return pipeline, latent_upsampler
170
-
171
- def _apply_precision_policy(self):
172
- prec = str(self.config.get("precision", "")).lower()
173
- self.runtime_autocast_dtype = torch.float32
174
- if prec in ["float8_e4m3fn", "bfloat16"]:
175
- self.runtime_autocast_dtype = torch.bfloat16
176
- elif prec == "mixed_precision":
177
- self.runtime_autocast_dtype = torch.float16
178
 
179
- def _register_tmp_dir(self, d: str):
180
- if d and os.path.isdir(d):
181
- self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
182
 
183
- @torch.no_grad()
184
- def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
185
- try:
186
- if not self.latent_upsampler:
187
- raise ValueError("Latent Upsampler não está carregado.")
188
- latents_unnormalized = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
189
- upsampled_latents = self.latent_upsampler(latents_unnormalized)
190
- return normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
191
- except Exception as e:
192
- pass
193
- finally:
 
 
 
194
  torch.cuda.empty_cache()
195
- torch.cuda.ipc_collect()
196
- self.finalize(keep_paths=[])
197
-
198
- def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
199
- tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
200
- tensor = torch.nn.functional.pad(tensor, padding_values)
201
- log_tensor_info(tensor, f"_prepare_conditioning_tensor")
202
- return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
203
-
204
 
205
- def _save_and_log_video(self, pixel_tensor, base_filename, fps, temp_dir, results_dir, used_seed, progress_callback=None):
206
- output_path = os.path.join(temp_dir, f"{base_filename}_.mp4")
207
- video_encode_tool_singleton.save_video_from_tensor(
208
- pixel_tensor, output_path, fps=fps, progress_callback=progress_callback
209
- )
210
- final_path = os.path.join(results_dir, f"{base_filename}_.mp4")
211
- shutil.move(output_path, final_path)
212
- print(f"[DEBUG] Vídeo salvo em: {final_path}")
213
- return final_path
214
-
215
- def _load_tensor(self, caminho):
216
- # Se já é um tensor, retorna diretamente
217
- if isinstance(caminho, torch.Tensor):
218
- return caminho
219
- # Se é bytes, carrega do buffer
220
- if isinstance(caminho, (bytes, bytearray)):
221
- return torch.load(io.BytesIO(caminho))
222
- # Caso contrário, assume que é um caminho de arquivo
223
- return torch.load(caminho)
224
-
225
-
226
- # ==============================================================================
227
- # --- FUNÇÕES MODULARES COM A LÓGICA DE CHUNKING SIMPLIFICADA ---
228
- # ==============================================================================
229
-
230
- def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int):
231
- if not items_list: return []
232
- height_padded = ((height - 1) // 8 + 1) * 8
233
- width_padded = ((width - 1) // 8 + 1) * 8
234
- padding_values = calculate_padding(height, width, height_padded, width_padded)
235
- conditioning_items = []
236
- for media, frame, weight in items_list:
237
- tensor = self._prepare_conditioning_tensor(media, height, width, padding_values) if isinstance(media, str) else media.to(self.device, dtype=self.runtime_autocast_dtype)
238
- safe_frame = max(0, min(int(frame), num_frames - 1))
239
- conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
240
- return conditioning_items
241
 
242
- def _prepare_condition_items_latent(self, items_list: List):
243
- if not items_list:
244
- return []
245
- conditioning_items = []
246
- for tensor_patch, frame, weight in items_list:
247
- # Verifica se já é um tensor
248
- if isinstance(tensor_patch, torch.Tensor):
249
- tensor = tensor_patch.to(self.device)
250
- # Se é bytes, carrega do buffer
251
- elif isinstance(tensor_patch, (bytes, bytearray)):
252
- tensor = torch.load(io.BytesIO(tensor_patch)).to(self.device)
253
- # Caso contrário, assume que é um caminho de arquivo
254
- else:
255
- tensor = torch.load(tensor_patch).to(self.device)
256
- safe_frame = max(0, int(frame))
257
- conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
258
- return conditioning_items
259
-
260
- def generate_low(self, prompt, negative_prompt, height, width, duration, guidance_scale, seed, conditioning_items=None):
261
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
262
- seed_everething(used_seed)
263
- FPS = 24.0
264
 
265
- actual_num_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
266
- height_padded = ((height - 1) // 8 + 1) * 8
267
- width_padded = ((width - 1) // 8 + 1) * 8
268
- temp_dir = tempfile.mkdtemp(prefix="ltxv_low_"); self._register_tmp_dir(temp_dir)
269
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
270
- downscale_factor = self.config.get("downscale_factor", 0.6666666)
271
- vae_scale_factor = self.pipeline.vae_scale_factor
272
- x_width = int(width_padded * downscale_factor)
273
- downscaled_width = x_width - (x_width % vae_scale_factor)
274
- x_height = int(height_padded * downscale_factor)
275
- downscaled_height = x_height - (x_height % vae_scale_factor)
276
- first_pass_kwargs = {
277
- "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
278
- "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": torch.Generator(device=self.device).manual_seed(used_seed),
279
- "output_type": "latent",
280
- #"conditioning_items": conditioning_items,
281
- #"guidance_scale": float(guidance_scale),
282
- **(self.config.get("first_pass", {}))
283
- }
284
 
285
- print(f"[DEBUG] generate_low.first_pass_kwargs: {first_pass_kwargs}")
286
- try:
287
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
288
- latents = self.pipeline(**first_pass_kwargs).images
289
- pixel_tensor = vae_manager_singleton.decode(latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
290
- video_path = self._save_and_log_video(pixel_tensor, "low_res_video", FPS, temp_dir, results_dir, used_seed)
291
- latents_cpu = latents.detach().to("cpu")
292
- tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
293
- torch.save(latents_cpu, tensor_path)
294
- return video_path, tensor_path, used_seed
295
-
296
- except Exception as e:
297
- print(f"[DEBUG] falhou: {e}")
298
- finally:
299
- torch.cuda.empty_cache()
300
- torch.cuda.ipc_collect()
301
- self.finalize(keep_paths=[])
302
 
303
- # ==============================================================================
304
- # --- FUNÇÃO #1: GERADOR DE CHUNK ÚNICO (AUXILIAR INTERNA) ---
305
- # ==============================================================================
306
- def _generate_single_chunk_low(
307
- self, prompt, negative_prompt,
308
- height, width, num_frames, guidance_scale,
309
- seed, itens_conditions_itens,
310
- ltx_configs_override=None):
311
  """
312
- [ DE GERAÇÃO]
313
- Gera um ÚNICO chunk de latentes brutos. Esta é a unidade de trabalho fundamental.
 
 
314
  """
315
- print("\n" + "-"*20 + " INÍCIO: _generate_single_chunk_low " + "-"*20)
 
 
316
 
317
- num_frames = ((num_frames - 1)//8)*8 + 1
318
-
319
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
320
- seed_everething(used_seed)
321
-
322
- # --- 1.1: SETUP DE PARÂMETROS ---
323
- height_padded = ((height - 1) // 8 + 1) * 8
324
- width_padded = ((width - 1) // 8 + 1) * 8
325
- generator = torch.Generator(device=self.device).manual_seed(seed)
326
-
327
- downscale_factor = self.config.get("downscale_factor", 0.6666666)
328
- vae_scale_factor = self.pipeline.vae_scale_factor
329
-
330
- x_width = int(width_padded * downscale_factor)
331
- downscaled_width = x_width - (x_width % vae_scale_factor)
332
- x_height = int(height_padded * downscale_factor)
333
- downscaled_height = x_height - (x_height % vae_scale_factor)
334
 
335
- # --- NÓ 1.2: MONTAGEM DE CONDIÇÕES E OVERRIDES ---
 
336
 
337
- first_pass_config = self.config.get("first_pass", {}).copy()
 
 
338
 
339
- if ltx_configs_override:
340
- print("[DEBUG] Sobrepondo configurações do LTX com valores da UI...")
341
- preset = ltx_configs_override.get("guidance_preset")
342
- if preset == "Customizado":
343
- try:
344
- first_pass_config["guidance_scale"] = json.loads(ltx_configs_override["guidance_scale_list"])
345
- first_pass_config["stg_scale"] = json.loads(ltx_configs_override["stg_scale_list"])
346
- #first_pass_config["guidance_timesteps"] = json.loads(ltx_configs_override["timesteps_list"])
347
- except Exception as e:
348
- print(f" > ERRO ao parsear valores customizados: {e}. Usando Padrão como fallback.")
349
- elif preset == "Agressivo":
350
- first_pass_config["guidance_scale"] = [1, 2, 8, 12, 8, 2, 1]
351
- first_pass_config["stg_scale"] = [0, 0, 5, 6, 5, 3, 2]
352
- elif preset == "Suave":
353
- first_pass_config["guidance_scale"] = [1, 1, 4, 5, 4, 1, 1]
354
- first_pass_config["stg_scale"] = [0, 0, 2, 2, 2, 1, 0]
355
-
356
- first_pass_kwargs = {
357
- "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
358
- "num_frames": num_frames, "frame_rate": 24, "generator": generator, "output_type": "latent",
359
- "conditioning_items": itens_conditions_itens,
360
- **first_pass_config,
361
- }
362
 
 
 
363
 
364
-
365
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
366
-
367
-
368
- # --- NÓ 1.3: CHAMADA AO PIPELINE ---
369
- try:
370
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
371
- latents_bruto = self.pipeline(**first_pass_kwargs).images
372
- #latents_cpu_bruto = latents_bruto.detach().to("cpu")
373
- #tensor_path_cpu = os.path.join(results_dir, f"latents_low_res.pt")
374
- #torch.save(latents_cpu_bruto, tensor_path_cpu)
375
- log_tensor_info(latents_bruto, f"Latente Bruto Gerado para: '{prompt[:40]}...'")
376
-
377
- print("-" * 20 + " FIM: _generate_single_chunk_low " + "-"*20)
378
- return latents_bruto
 
 
 
 
379
 
380
  except Exception as e:
381
- print("-" * 20 + " ERRO: _generate_single_chunk_low --------------------")
382
  traceback.print_exc()
383
- print("-" * 20 + " ----------------------------------------------")
384
- return None
385
  finally:
386
- torch.cuda.empty_cache()
387
- torch.cuda.ipc_collect()
388
- self.finalize(keep_paths=[])
389
-
390
- # ==============================================================================
391
- # --- FUNÇÃO #2: ORQUESTRADOR NARRATIVO (MÚLTIPLOS PROMPTS) ---
392
- # ==============================================================================
393
- def generate_narrative_low(
394
- self, prompt: str, negative_prompt,
395
- height, width, duration, guidance_scale,
396
- seed, initial_conditions, overlap_frames: int = 4,
397
- ltx_configs_override: dict = None):
398
- """
399
- [ORQUESTRADOR NARRATIVO]
400
- Gera um vídeo em múltiplos chunks sequenciais a partir de um prompt com várias linhas.
401
- """
402
- print("\n" + "="*80)
403
- print("====== INICIANDO GERAÇÃO NARRATIVA EM CHUNKS (LOW-RES) ======")
404
- print("="*80)
405
-
406
-
407
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
408
- seed_everething(used_seed)
409
- FPS = 24.0
410
 
411
- prompt_list = [p.strip() for p in prompt.splitlines() if p.strip()]
412
- num_chunks = len(prompt_list)
413
- if num_chunks == 0: raise ValueError("O prompt está vazio ou não contém linhas válidas.")
414
-
415
- total_actual_frames = max(8, int(round((round(duration * FPS) ) / 8.0) * 8 ))
416
 
417
-
418
- frames_per_chunk = total_actual_frames//num_chunks
419
- #frames_per_chunk_last = max(9, frames_per_chunk_last)
420
-
421
- poda_latents_num = overlap_frames
422
-
423
 
424
- latentes_chunk_video = []
425
- overlap_condition = []
426
- overlap_latents = None
427
- lista_patch_latentes_chunk = []
428
- condition_item_latent_overlap = None
429
- temp_dir = tempfile.mkdtemp(prefix="ltxv_narrative_"); self._register_tmp_dir(temp_dir)
430
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
431
 
432
- for i, chunk_prompt in enumerate(prompt_list):
433
- print(f"\n--- Gerando Chunk Narrativo {i+1}/{num_chunks}: '{chunk_prompt}' ---")
434
-
435
- #current_image_conditions = []
436
- #if initial_image_conditions:
437
- # cond_item_original = initial_image_conditions[0]
438
- # if i == 0:
439
- # current_image_conditions.append(cond_item_original)
440
- # else:
441
- # cond_item_fraco = ConditioningItem(
442
- # media_item=cond_item_original.media_item, media_frame_number=0, conditioning_strength=0.1
443
- # )
444
- # current_image_conditions.append(cond_item_fraco)
445
-
446
-
447
- poda_latents_num = 8
448
-
449
- if i > 0 and poda_latents_num > 0:
450
- frames_per_chunk += poda_latents_num
451
- else:
452
- frames_per_chunk = frames_per_chunk
453
-
454
- if i == num_chunks - 1:
455
- frames_per_chunk = frames_per_chunk+poda_latents_num
456
-
457
- #frames_per_chunk = ((frames_per_chunk - 1)//8)*8 + 1
458
-
459
-
460
- if i> 0:
461
- initial_conditions = []
462
-
463
- if i > 0:
464
- initial_conditions = []
465
 
466
- if overlap_latents is not None:
467
- # Já é um tensor, usa diretamente
468
- overlap_latents_r = overlap_latents
469
- items_list = [[overlap_latents_r, 0, 1.0]]
470
- overlap_condition = self._prepare_condition_items_latent(items_list)
471
-
472
- itens_conditions_itens = initial_conditions + overlap_condition
473
-
474
- latentes_bruto_r = self._generate_single_chunk_low(
475
- prompt=chunk_prompt, negative_prompt=negative_prompt, height=height, width=width,
476
- num_frames=frames_per_chunk, guidance_scale=guidance_scale, seed=used_seed + i,
477
- itens_conditions_itens=itens_conditions_itens,
478
- ltx_configs_override=ltx_configs_override
479
  )
480
-
481
-
482
- print(f"[DEBUG] generate_narrative_low.frames_per_chunk: {frames_per_chunk}")
483
- log_tensor_info(latentes_bruto_r, f"latentes_bruto_r recebidk: {i}...'")
484
-
485
- #latent_path_bufer = load_tensor(latent_path)
486
- #final_latents = torch.cat(lista_tensores, dim=2).to(self.device)
487
-
488
 
489
- #poda inicio overlap
490
- if i > 0 and poda_latents_num > 0:
491
- latentes_bruto = latentes_bruto_r[:, :, poda_latents_num:, :, :].clone()
492
- else:
493
- latentes_bruto = latentes_bruto_r[:, :, :, :, :].clone()
494
-
495
- log_tensor_info(latentes_bruto, f"latentes_bruto recebidk: {i}...'")
496
-
497
- # cria estado overlap para proximo
498
- if i < num_chunks - 1 and poda_latents_num > 0:
499
- overlap_latents = latentes_bruto_r[:, :, -poda_latents_num:, :, :].clone()
500
- log_tensor_info(overlap_latents, f"overlap_latents recebidk: {i}...'")
501
- overlap_latents = overlap_latents.detach().to(self.device)
502
- condition_item_latent_overlap = ConditioningItem(
503
- media_item=overlap_latents, media_frame_number=0, conditioning_strength=1.0
504
- )
505
-
506
 
507
- #adiciona a lista
508
- tensor_path_podado = os.path.join(results_dir, f"latents_poda{i}_res.pt")
509
- torch.save(latentes_bruto, tensor_path_podado)
510
- lista_patch_latentes_chunk.append(tensor_path_podado)
511
 
512
- print("\n--- Finalizando Narrativa: Concatenando chunks ---")
513
-
514
- # Carrega cada tensor do disco
515
- lista_tensores = [self._load_tensor(c) for c in lista_patch_latentes_chunk]
516
- final_latents = torch.cat(lista_tensores, dim=2).to(self.device)
517
- log_tensor_info(final_latents, "Tensor de Latentes Final Concatenado")
518
-
519
- try:
520
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
521
- pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
522
- pixel_tensor_cpu = pixel_tensor.detach().to("cpu")
523
- video_path = self._save_and_log_video(pixel_tensor_cpu, "narrative_video", FPS, temp_dir, results_dir, used_seed)
524
- final_latents_cpu = final_latents.detach().to("cpu")
525
- final_latents_patch = os.path.join(results_dir, f"latents_low_fim.pt")
526
- torch.save(final_latents_cpu, final_latents_patch)
527
- return video_path, final_latents_patch, used_seed
528
-
529
  except Exception as e:
530
- print("-" * 20 + " ERRO: generate_narrative_low --------------------")
531
  traceback.print_exc()
532
- print("-" * 20 + " ----------------------------------------------")
533
  return None, None, None
534
  finally:
535
- torch.cuda.empty_cache()
536
- torch.cuda.ipc_collect()
537
- self.finalize(keep_paths=[])
538
-
539
- # ==============================================================================
540
- # --- FUNÇÃO #3: ORQUESTRADOR SIMPLES (PROMPT ÚNICO) ---
541
- # ==============================================================================
542
- def generate_single_low(
543
- self, prompt: str, negative_prompt,
544
- height, width, duration, guidance_scale,
545
- seed, initial_conditions=None,
546
- ltx_configs_override: dict = None):
547
  """
548
- [ORQUESTRADOR SIMPLES]
549
- Gera um vídeo completo em um único chunk. Ideal para prompts simples e curtos.
550
  """
551
- print("\n" + "="*80)
552
- print("====== INICIANDO GERAÇÃO SIMPLES EM CHUNK ÚNICO (LOW-RES) ======")
553
- print("="*80)
554
 
555
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
556
- seed_everething(used_seed)
557
- FPS = 24.0
558
 
559
- total_actual_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
560
 
561
- temp_dir = tempfile.mkdtemp(prefix="ltxv_single_"); self._register_tmp_dir(temp_dir)
562
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
563
-
564
- # Chama a função de geração de chunk único para fazer todo o trabalho
565
- final_latents = self._generate_single_chunk_low(
566
- prompt=prompt, negative_prompt=negative_prompt, height=height, width=width,
567
- num_frames=total_actual_frames, guidance_scale=guidance_scale, seed=used_seed,
568
- itens_conditions_itens=initial_conditions,
569
- ltx_configs_override=ltx_configs_override
570
- )
571
-
572
- print("\n--- Finalizando Geração Simples: Salvando e decodificando ---")
573
- log_tensor_info(final_latents, "Tensor de Latentes Final")
574
-
575
- try:
576
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
577
- pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
578
- pixel_tensor_cpu = pixel_tensor.detach().to("cpu")
579
- video_path = self._save_and_log_video(pixel_tensor_cpu, "narrative_video", FPS, temp_dir, results_dir, used_seed)
580
- final_latents_cpu = final_latents.detach().to("cpu")
581
- final_latents_patch = os.path.join(results_dir, f"latents_low_fim.pt")
582
- torch.save(final_latents_cpu, final_latents_patch)
583
- return video_path, final_latents_patch, used_seed
584
- except Exception as e:
585
- print("-" * 20 + " ERRO: generate_single_low --------------------")
586
- traceback.print_exc()
587
- print("-" * 20 + " ----------------------------------------------")
588
- return None, None, None
589
- finally:
590
- torch.cuda.empty_cache()
591
- torch.cuda.ipc_collect()
592
- self.finalize(keep_paths=[])
593
 
594
-
 
595
 
596
- # ==============================================================================
597
- # --- FUNÇÃO #4: ORQUESTRADOR (Upscaler + texturas hd) ---
598
- # ==============================================================================
599
- def generate_upscale_denoise(
600
- self, latents_path, prompt, negative_prompt,
601
- guidance_scale, seed,
602
- ):
603
- used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
604
- seed_everething(used_seed)
605
- temp_dir = tempfile.mkdtemp(prefix="ltxv_up_"); self._register_tmp_dir(temp_dir)
606
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
607
- latents_low = torch.load(latents_path).to(self.device)
608
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
609
- upsampled_latents = self._upsample_latents_internal(latents_low)
610
- upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents_low)
611
- del latents_low; torch.cuda.empty_cache()
612
-
613
- # --- LÓGICA DE DIVISÃO SIMPLES COM OVERLAP ---
614
- total_frames = upsampled_latents.shape[2]
615
- # Garante que mid_point seja pelo menos 1 para evitar um segundo chunk vazio se houver poucos frames
616
- mid_point = max(1, total_frames // 2)
617
- chunk1 = upsampled_latents[:, :, :mid_point, :, :]
618
- # O segundo chunk começa um frame antes para criar o overlap
619
- chunk2 = upsampled_latents[:, :, mid_point - 1:, :, :]
620
-
621
- final_latents_list = []
622
- for i, chunk in enumerate([chunk1, chunk2]):
623
- if chunk.shape[2] <= 1: continue # Pula chunks inválidos ou vazios
624
- second_pass_height = chunk.shape[3] * self.pipeline.vae_scale_factor
625
- second_pass_width = chunk.shape[4] * self.pipeline.vae_scale_factor
626
- second_pass_kwargs = {
627
- "prompt": prompt, "negative_prompt": negative_prompt, "height": second_pass_height, "width": second_pass_width,
628
- "num_frames": chunk.shape[2], "latents": chunk,
629
- #"guidance_scale": float(guidance_scale),
630
- "output_type": "latent", "generator": torch.Generator(device=self.device).manual_seed(used_seed),
631
- **(self.config.get("second_pass", {}))
632
- }
633
- refined_chunk = self.pipeline(**second_pass_kwargs).images
634
- # Remove o overlap do primeiro chunk refinado antes de juntar
635
- if i == 0:
636
- final_latents_list.append(refined_chunk[:, :, :-1, :, :])
637
- else:
638
- final_latents_list.append(refined_chunk)
639
-
640
- final_latents = torch.cat(final_latents_list, dim=2)
641
- log_tensor_info(final_latents, "Latentes Upscaled/Refinados Finais")
642
 
643
- latents_cpu = final_latents.detach().to("cpu")
644
- tensor_path = os.path.join(results_dir, f"latents_refined_{used_seed}.pt")
645
- torch.save(latents_cpu, tensor_path)
646
- pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
647
- video_path = self._save_and_log_video(pixel_tensor, "refined_video", 24.0, temp_dir, results_dir, used_seed)
648
- return video_path, tensor_path
649
-
650
- def encode_mp4(self, latents_path: str, fps: int = 24):
651
- latents = torch.load(latents_path)
652
- seed = random.randint(0, 99999)
653
- temp_dir = tempfile.mkdtemp(prefix="ltxv_enc_"); self._register_tmp_dir(temp_dir)
654
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
655
 
656
- # --- LÓGICA DE DIVISÃO SIMPLES COM OVERLAP ---
657
- total_frames = latents.shape[2]
658
- mid_point = max(1, total_frames // 2)
659
- chunk1_latents = latents[:, :, :mid_point, :, :]
660
- chunk2_latents = latents[:, :, mid_point - 1:, :, :]
661
 
662
- video_parts = []
663
- pixel_chunks_to_concat = []
664
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
665
- for i, chunk in enumerate([chunk1_latents, chunk2_latents]):
666
- if chunk.shape[2] == 0: continue
667
- pixel_chunk = vae_manager_singleton.decode(chunk.to(self.device), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
668
- # Remove o overlap do primeiro chunk de pixels
669
- if i == 0:
670
- pixel_chunks_to_concat.append(pixel_chunk[:, :, :-1, :, :])
671
- else:
672
- pixel_chunks_to_concat.append(pixel_chunk)
673
 
674
- final_pixel_tensor = torch.cat(pixel_chunks_to_concat, dim=2)
675
- final_video_path = self._save_and_log_video(final_pixel_tensor, f"final_concatenated_{seed}", fps, temp_dir, results_dir, seed)
676
- return final_video_path
677
 
678
- def __init__(self):
679
- t0 = time.perf_counter()
680
- print("[DEBUG] Inicializando VideoService...")
 
681
 
682
- # 1. Obter o dispositivo alvo a partir do gerenciador
683
- # Não definimos `self.device` ainda, apenas guardamos o alvo.
684
- target_device = gpu_manager.get_ltx_device()
685
- print(f"[DEBUG] LTX foi alocado para o dispositivo: {target_device}")
686
-
687
- # 2. Carregar a configuração e os modelos (na CPU, como a função _load_models faz)
688
- self.config = self._load_config()
689
- self.pipeline, self.latent_upsampler = self._load_models()
 
690
 
691
- # 3. Mover os modelos para o dispositivo alvo e definir `self.device`
692
- self.move_to_device(target_device) # Usando a função que criamos!
 
 
 
 
693
 
694
- # 4. Configurar o resto dos componentes com o dispositivo correto
695
- self._apply_precision_policy()
696
- vae_manager_singleton.attach_pipeline(
697
- self.pipeline,
698
- device=self.device, # Agora `self.device` está correto
699
- autocast_dtype=self.runtime_autocast_dtype
700
- )
701
- self._tmp_dirs = set()
702
- print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
703
-
704
- # A função move_to_device que criamos antes é essencial aqui
705
- def move_to_device(self, device):
706
- """Move os modelos do pipeline para o dispositivo especificado."""
707
- print(f"[LTX] Movendo modelos para {device}...")
708
- self.device = torch.device(device) # Garante que é um objeto torch.device
709
- self.pipeline.to(self.device)
710
- if self.latent_upsampler:
711
- self.latent_upsampler.to(self.device)
712
- print(f"[LTX] Modelos agora estão em {self.device}.")
713
 
714
- def move_to_cpu(self):
715
- """Move os modelos para a CPU para liberar VRAM."""
716
- self.move_to_device(torch.device("cpu"))
717
- if torch.cuda.is_available():
718
- torch.cuda.empty_cache()
719
-
720
-
721
- # Instanciação limpa, sem usar `self` fora da classe.
722
- print("Criando instância do VideoService...")
723
- video_generation_service = VideoService()
724
- print("Instância do VideoService pronta.")
 
 
1
+ # FILE: ltx_server_refactored_complete.py
2
+ # DESCRIPTION: Backend service for video generation using LTX-Video pipeline.
3
+ # Features modular generation, narrative chunking, and resource management.
4
 
5
+ import gc
6
+ import io
 
 
7
  import json
8
+ import logging
 
9
  import os
10
+ import random
 
 
 
 
 
 
 
 
 
 
 
 
11
  import shutil
12
+ import subprocess
13
+ import sys
14
+ import tempfile
15
  import time
16
  import traceback
17
+ import warnings
18
+ from pathlib import Path
19
+ from typing import Dict, List, Optional, Tuple
20
+
21
+ import torch
22
+ import yaml
23
  from einops import rearrange
24
+ from huggingface_hub import hf_hub_download
25
+
26
+ # ==============================================================================
27
+ # --- INITIAL SETUP & CONFIGURATION ---
28
+ # ==============================================================================
29
 
30
+ # Suppress excessive logs from external libraries
31
+ warnings.filterwarnings("ignore")
32
+ logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
33
+ logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
34
+
35
+ # --- CONSTANTS ---
36
  DEPS_DIR = Path("/data")
37
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
38
+ BASE_CONFIG_PATH = LTX_VIDEO_REPO_DIR / "configs"
39
+ DEFAULT_CONFIG_FILE = BASE_CONFIG_PATH / "ltxv-13b-0.9.8-distilled-fp8.yaml"
40
+ LTX_REPO_ID = "Lightricks/LTX-Video"
41
+ RESULTS_DIR = Path("/app/output")
42
+ DEFAULT_FPS = 24.0
43
+ FRAMES_ALIGNMENT = 8
44
+
45
+ # --- DEPENDENCY PATH SETUP ---
46
+ # Ensures the LTX-Video library can be imported
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def add_deps_to_path():
48
+ """Adds the LTX repository directory to the Python system path."""
49
  repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
50
+ if repo_path not in sys.path:
51
  sys.path.insert(0, repo_path)
52
+ logging.info(f"Repo added to sys.path: {repo_path}")
53
+
54
+ add_deps_to_path()
55
+
56
+ # --- PROJECT IMPORTS ---
57
+ # These must come after the path setup
58
+ from api.gpu_manager import gpu_manager
59
+ from ltx_video.models.autoencoders.vae_encode import (normalize_latents, un_normalize_latents)
60
+ from ltx_video.pipelines.pipeline_ltx_video import (ConditioningItem, LTXMultiScalePipeline, adain_filter_latent)
61
+ from ltx_video.pipelines.pipeline_ltx_video import create_ltx_video_pipeline, create_latent_upsampler
62
+ from ltx_video.utils.inference_utils import load_image_to_tensor_with_resize_and_crop
63
+ from managers.vae_manager import vae_manager_singleton
64
+ from tools.video_encode_tool import video_encode_tool_singleton
65
+
66
+
67
+ # ==============================================================================
68
+ # --- UTILITY & HELPER FUNCTIONS ---
69
+ # ==============================================================================
70
+
71
+ def seed_everything(seed: int):
72
+ """Sets the seed for reproducibility across all relevant libraries."""
73
+ random.seed(seed)
74
+ os.environ['PYTHONHASHSEED'] = str(seed)
75
+ np.random.seed(seed)
76
+ torch.manual_seed(seed)
77
+ torch.cuda.manual_seed_all(seed)
78
+ # Potentially faster, but less reproducible
79
+ # torch.backends.cudnn.deterministic = False
80
+ # torch.backends.cudnn.benchmark = True
81
+
82
+ def calculate_padding(orig_h: int, orig_w: int, target_h: int, target_w: int) -> Tuple[int, int, int, int]:
83
+ """Calculates symmetric padding values to reach a target dimension."""
84
  pad_h = target_h - orig_h
85
  pad_w = target_w - orig_w
86
  pad_top = pad_h // 2
 
88
  pad_left = pad_w // 2
89
  pad_right = pad_w - pad_left
90
  return (pad_left, pad_right, pad_top, pad_bottom)
91
+
92
+ def log_tensor_info(tensor: torch.Tensor, name: str = "Tensor"):
93
+ """Logs detailed information about a PyTorch tensor for debugging."""
94
  if not isinstance(tensor, torch.Tensor):
95
+ logging.debug(f"'{name}' is not a tensor.")
96
  return
97
+
98
+ info_str = (
99
+ f"--- Tensor: {name} ---\n"
100
+ f" - Shape: {tuple(tensor.shape)}\n"
101
+ f" - Dtype: {tensor.dtype}\n"
102
+ f" - Device: {tensor.device}\n"
103
+ )
104
  if tensor.numel() > 0:
105
  try:
106
+ info_str += (
107
+ f" - Min: {tensor.min().item():.4f} | "
108
+ f"Max: {tensor.max().item():.4f} | "
109
+ f"Mean: {tensor.mean().item():.4f}\n"
110
+ )
111
  except Exception:
112
+ pass # Fails on some dtypes
113
+ logging.debug(info_str + "----------------------")
114
 
115
+
116
+ # ==============================================================================
117
+ # --- VIDEO SERVICE CLASS ---
118
+ # ==============================================================================
 
 
 
 
 
 
 
119
 
120
  class VideoService:
121
+ """
122
+ Backend service for orchestrating video generation using the LTX-Video pipeline.
123
+ Encapsulates model loading, state management, and the logic for multi-stage
124
+ video generation (low-resolution, upscale).
125
+ """
126
+
127
+ def __init__(self):
128
+ """Initializes the service, loads models, and configures the environment."""
129
+ t0 = time.perf_counter()
130
+ logging.info("Initializing VideoService...")
131
+ RESULTS_DIR.mkdir(parents=True, exist_ok=True)
132
+
133
+ self.config = self._load_config(DEFAULT_CONFIG_FILE)
134
+ self._tmp_dirs = set()
135
+
136
+ self.pipeline, self.latent_upsampler = self._load_models_on_cpu()
137
+
138
+ target_device = gpu_manager.get_ltx_device()
139
+ self.device = torch.device("cpu") # Default device
140
+ self.move_to_device(target_device)
141
+
142
+ self._apply_precision_policy()
143
+ vae_manager_singleton.attach_pipeline(
144
+ self.pipeline,
145
+ device=self.device,
146
+ autocast_dtype=self.runtime_autocast_dtype
147
+ )
148
+
149
+ logging.info(f"VideoService ready. Startup time: {time.perf_counter()-t0:.2f}s")
150
+
151
+ # ==========================================================================
152
+ # --- LIFECYCLE & MODEL MANAGEMENT ---
153
+ # ==========================================================================
154
+
155
+ def _load_config(self, config_path: Path) -> Dict:
156
+ """Loads the YAML configuration file."""
157
+ logging.info(f"Loading config from: {config_path}")
158
  with open(config_path, "r") as file:
159
  return yaml.safe_load(file)
160
 
161
+ def _load_models_on_cpu(self) -> Tuple[LTXMultiScalePipeline, Optional[torch.nn.Module]]:
162
+ """Downloads and loads the pipeline and upsampler checkpoints onto the CPU."""
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  t0 = time.perf_counter()
164
+
165
+ logging.info("Downloading main checkpoint...")
166
  distilled_model_path = hf_hub_download(
167
+ repo_id=LTX_REPO_ID,
168
  filename=self.config["checkpoint_path"],
 
 
169
  token=os.getenv("HF_TOKEN"),
170
  )
171
  self.config["checkpoint_path"] = distilled_model_path
 
 
 
 
 
 
 
 
 
 
 
 
172
 
 
173
  pipeline = create_ltx_video_pipeline(
174
  ckpt_path=self.config["checkpoint_path"],
175
  precision=self.config["precision"],
176
+ device="cpu", # Load on CPU first
177
+ # Pass other config values directly
178
+ **{k: v for k, v in self.config.items() if k in create_ltx_video_pipeline.__code__.co_varnames}
 
 
 
179
  )
180
+
 
181
  latent_upsampler = None
182
  if self.config.get("spatial_upscaler_model_path"):
183
+ logging.info("Downloading spatial upscaler checkpoint...")
184
+ spatial_upscaler_path = hf_hub_download(
185
+ repo_id=LTX_REPO_ID,
186
+ filename=self.config["spatial_upscaler_model_path"],
187
+ token=os.getenv("HF_TOKEN")
188
+ )
189
+ self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
190
  latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ logging.info(f"Models loaded on CPU in {time.perf_counter()-t0:.2f}s")
193
+ return pipeline, latent_upsampler
 
194
 
195
+ def move_to_device(self, device_str: str):
196
+ """Moves all relevant models to the specified device (e.g., 'cuda:0' or 'cpu')."""
197
+ target_device = torch.device(device_str)
198
+ if self.device == target_device:
199
+ logging.info(f"Models are already on the target device: {device_str}")
200
+ return
201
+
202
+ logging.info(f"Moving models to {device_str}...")
203
+ self.device = target_device
204
+ self.pipeline.to(self.device)
205
+ if self.latent_upsampler:
206
+ self.latent_upsampler.to(self.device)
207
+
208
+ if device_str == "cpu" and torch.cuda.is_available():
209
  torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
210
 
211
+ logging.info(f"Models successfully moved to {self.device}.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
+ def finalize(self, keep_paths: Optional[List[str]] = None):
214
+ """Cleans up GPU memory and temporary directories."""
215
+ logging.debug("Finalizing resources...")
216
+ gc.collect()
217
+ if torch.cuda.is_available():
218
+ torch.cuda.empty_cache()
219
+ try:
220
+ torch.cuda.ipc_collect()
221
+ except Exception:
222
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
+ # Optional: Clean up temporary directories if needed (logic can be added here)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
+ # ==========================================================================
228
+ # --- PUBLIC ORCHESTRATORS ---
229
+ # These are the main entry points called by the frontend.
230
+ # ==========================================================================
231
+
232
+ def generate_narrative_low(self, prompt: str, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
 
 
233
  """
234
+ [ORCHESTRATOR] Generates a video from a multi-line prompt, creating a sequence of scenes.
235
+
236
+ Returns:
237
+ A tuple of (video_path, latents_path, used_seed).
238
  """
239
+ logging.info("Starting narrative low-res generation...")
240
+ used_seed = self._resolve_seed(kwargs.get("seed"))
241
+ seed_everything(used_seed)
242
 
243
+ prompt_list = [p.strip() for p in prompt.splitlines() if p.strip()]
244
+ if not prompt_list:
245
+ raise ValueError("Prompt is empty or contains no valid lines.")
246
+
247
+ num_chunks = len(prompt_list)
248
+ total_frames = self._calculate_aligned_frames(kwargs.get("duration", 4.0))
249
+ frames_per_chunk = (total_frames // num_chunks // FRAMES_ALIGNMENT) * FRAMES_ALIGNMENT
250
+ overlap_frames = self.config.get("overlap_frames", 8)
 
 
 
 
 
 
 
 
 
251
 
252
+ all_latents_paths = []
253
+ overlap_condition_item = None
254
 
255
+ try:
256
+ for i, chunk_prompt in enumerate(prompt_list):
257
+ logging.info(f"Generating narrative chunk {i+1}/{num_chunks}: '{chunk_prompt[:50]}...'")
258
 
259
+ current_frames = frames_per_chunk
260
+ if i > 0:
261
+ current_frames += overlap_frames
262
+
263
+ # Use initial image conditions only for the first chunk
264
+ current_conditions = kwargs.get("initial_conditions", []) if i == 0 else []
265
+ if overlap_condition_item:
266
+ current_conditions.append(overlap_condition_item)
267
+
268
+ chunk_latents = self._generate_single_chunk_low(
269
+ prompt=chunk_prompt,
270
+ num_frames=current_frames,
271
+ seed=used_seed + i,
272
+ conditioning_items=current_conditions,
273
+ **kwargs
274
+ )
 
 
 
 
 
 
 
275
 
276
+ if chunk_latents is None:
277
+ raise RuntimeError(f"Failed to generate latents for chunk {i+1}.")
278
 
279
+ # Create overlap for the next chunk
280
+ if i < num_chunks - 1:
281
+ overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
282
+ log_tensor_info(overlap_latents, f"Overlap Latents from chunk {i+1}")
283
+ overlap_condition_item = ConditioningItem(
284
+ media_item=overlap_latents, media_frame_number=0, conditioning_strength=1.0
285
+ )
286
+
287
+ # Trim the overlap from the current chunk before saving
288
+ if i > 0:
289
+ chunk_latents = chunk_latents[:, :, overlap_frames:, :, :]
290
+
291
+ # Save chunk latents to disk to manage memory
292
+ chunk_path = RESULTS_DIR / f"chunk_{i}_{used_seed}.pt"
293
+ torch.save(chunk_latents.cpu(), chunk_path)
294
+ all_latents_paths.append(chunk_path)
295
+
296
+ # Concatenate, decode, and save the final video
297
+ return self._finalize_generation(all_latents_paths, "narrative_video", used_seed)
298
 
299
  except Exception as e:
300
+ logging.error(f"Error during narrative generation: {e}")
301
  traceback.print_exc()
302
+ return None, None, None
 
303
  finally:
304
+ # Clean up intermediate chunk files
305
+ for path in all_latents_paths:
306
+ if os.path.exists(path):
307
+ os.remove(path)
308
+ self.finalize()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
 
 
 
 
 
310
 
311
+ def generate_single_low(self, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
312
+ """
313
+ [ORCHESTRATOR] Generates a video from a single prompt in one go.
 
 
 
314
 
315
+ Returns:
316
+ A tuple of (video_path, latents_path, used_seed).
317
+ """
318
+ logging.info("Starting single-prompt low-res generation...")
319
+ used_seed = self._resolve_seed(kwargs.get("seed"))
320
+ seed_everything(used_seed)
 
321
 
322
+ try:
323
+ total_frames = self._calculate_aligned_frames(kwargs.get("duration", 4.0), min_frames=9)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
+ final_latents = self._generate_single_chunk_low(
326
+ num_frames=total_frames,
327
+ seed=used_seed,
328
+ conditioning_items=kwargs.get("initial_conditions", []),
329
+ **kwargs
 
 
 
 
 
 
 
 
330
  )
 
 
 
 
 
 
 
 
331
 
332
+ if final_latents is None:
333
+ raise RuntimeError("Failed to generate latents.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ # Save latents to a single file, then decode and save video
336
+ latents_path = RESULTS_DIR / f"single_{used_seed}.pt"
337
+ torch.save(final_latents.cpu(), latents_path)
338
+ return self._finalize_generation([latents_path], "single_video", used_seed)
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  except Exception as e:
341
+ logging.error(f"Error during single generation: {e}")
342
  traceback.print_exc()
 
343
  return None, None, None
344
  finally:
345
+ self.finalize()
346
+
347
+
348
+ # ==========================================================================
349
+ # --- INTERNAL WORKER UNITS ---
350
+ # ==========================================================================
351
+
352
+ def _generate_single_chunk_low(
353
+ self, prompt: str, negative_prompt: str, height: int, width: int, num_frames: int, seed: int,
354
+ conditioning_items: List[ConditioningItem], ltx_configs_override: Optional[Dict], **kwargs
355
+ ) -> Optional[torch.Tensor]:
 
356
  """
357
+ [WORKER] Generates a single chunk of latents. This is the core generation unit.
358
+ Returns the raw latents tensor on the target device, or None on failure.
359
  """
360
+ height_padded, width_padded = (self._align(d) for d in (height, width))
361
+ downscale_factor = self.config.get("downscale_factor", 0.6666666)
362
+ vae_scale_factor = self.pipeline.vae_scale_factor
363
 
364
+ downscaled_height = self._align(int(height_padded * downscale_factor), vae_scale_factor)
365
+ downscaled_width = self._align(int(width_padded * downscale_factor), vae_scale_factor)
 
366
 
367
+ first_pass_config = self.config.get("first_pass", {}).copy()
368
+ if ltx_configs_override:
369
+ first_pass_config.update(self._prepare_guidance_overrides(ltx_configs_override))
370
+
371
+ pipeline_kwargs = {
372
+ "prompt": prompt,
373
+ "negative_prompt": negative_prompt,
374
+ "height": downscaled_height,
375
+ "width": downscaled_width,
376
+ "num_frames": num_frames,
377
+ "frame_rate": DEFAULT_FPS,
378
+ "generator": torch.Generator(device=self.device).manual_seed(seed),
379
+ "output_type": "latent",
380
+ "conditioning_items": conditioning_items,
381
+ **first_pass_config
382
+ }
383
 
384
+ logging.debug(f"Pipeline call args: { {k: v for k, v in pipeline_kwargs.items() if k != 'conditioning_items'} }")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
+ with torch.autocast(device_type=self.device.type, dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
387
+ latents_raw = self.pipeline(**pipeline_kwargs).images
388
 
389
+ log_tensor_info(latents_raw, f"Raw Latents for '{prompt[:40]}...'")
390
+ return latents_raw
391
+
392
+
393
+ # ==========================================================================
394
+ # --- HELPERS & UTILITY METHODS ---
395
+ # ==========================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
 
397
+ def _finalize_generation(self, latents_paths: List[Path], base_filename: str, seed: int) -> Tuple[str, str, int]:
398
+ """
399
+ Loads latents from paths, concatenates them, decodes to video, and saves both.
400
+ """
401
+ logging.info("Finalizing generation: decoding latents to video.")
402
+ # Load all tensors and concatenate them on the CPU first
403
+ all_tensors_cpu = [torch.load(p) for p in latents_paths]
404
+ final_latents_cpu = torch.cat(all_tensors_cpu, dim=2)
 
 
 
 
405
 
406
+ # Save final combined latents
407
+ final_latents_path = RESULTS_DIR / f"latents_{base_filename}_{seed}.pt"
408
+ torch.save(final_latents_cpu, final_latents_path)
409
+ logging.info(f"Final latents saved to: {final_latents_path}")
 
410
 
411
+ # Move to GPU for decoding
412
+ final_latents_gpu = final_latents_cpu.to(self.device)
413
+ log_tensor_info(final_latents_gpu, "Final Concatenated Latents")
414
+
415
+ with torch.autocast(device_type=self.device.type, dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
416
+ pixel_tensor = vae_manager_singleton.decode(
417
+ final_latents_gpu,
418
+ decode_timestep=float(self.config.get("decode_timestep", 0.05))
419
+ )
 
 
420
 
421
+ video_path = self._save_and_log_video(pixel_tensor, f"{base_filename}_{seed}")
422
+ return str(video_path), str(final_latents_path), seed
 
423
 
424
+ def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int) -> List[ConditioningItem]:
425
+ """Prepares a list of ConditioningItem objects from file paths or tensors."""
426
+ if not items_list:
427
+ return []
428
 
429
+ height_padded, width_padded = self._align(height), self._align(width)
430
+ padding_values = calculate_padding(height, width, height_padded, width_padded)
431
+
432
+ conditioning_items = []
433
+ for media, frame, weight in items_list:
434
+ tensor = self._prepare_conditioning_tensor(media, height, width, padding_values)
435
+ safe_frame = max(0, min(int(frame), num_frames - 1))
436
+ conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
437
+ return conditioning_items
438
 
439
+ def _prepare_conditioning_tensor(self, media_path: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
440
+ """Loads and processes an image to be a conditioning tensor."""
441
+ tensor = load_image_to_tensor_with_resize_and_crop(media_path, height, width)
442
+ tensor = torch.nn.functional.pad(tensor, padding)
443
+ log_tensor_info(tensor, f"Prepared Conditioning Tensor from {media_path}")
444
+ return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
445
 
446
+ def _prepare_guidance_overrides(self, ltx_configs: Dict) -> Dict:
447
+ """Parses UI presets for guidance into pipeline-compatible arguments."""
448
+ overrides = {}
449
+ preset = ltx_configs.get("guidance_preset", "Padrão (Recomendado)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
+ # Default LTX values are used if preset is 'Padrão'
452
+ if preset == "Agressivo":
453
+ overrides["guidance_scale"] = [1, 2, 8, 12, 8, 2, 1]
454
+ overrides["stg_scale"] = [0, 0, 5, 6, 5, 3, 2]
455
+ elif preset == "Suave":
456
+ overrides["guidance_scale"] = [1, 1, 4, 5, 4, 1, 1]
457
+ overrides["stg_scale"] = [0, 0, 2, 2, 2, 1, 0]
458
+ elif preset == "Customizado":
459
+ try:
460
+ overrides["guidance_scale"] = json.loads(ltx_configs["guidance_scale_list"])
461
+ overrides["stg_scale"] = json.loads(ltx_configs["stg_scale_list"])
462
+ except (json.JSONDecodeError, KeyError