EuuIia commited on
Commit
9b10e93
·
verified ·
1 Parent(s): 47475ad

Upload ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +61 -176
api/ltx_server.py CHANGED
@@ -1,5 +1,4 @@
1
- # ltx_server.py — VideoService com logs de depuração detalhados (init→MP4)
2
- # Opção external_decode: True (default) decodifica latentes com VAE fora da pipeline.
3
 
4
  # --- 1. IMPORTAÇÕES ---
5
  import torch
@@ -26,7 +25,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
26
  try:
27
  import psutil
28
  import pynvml as nvml
29
- print("[DEBUG] NVML: inicializando para consulta de processos...")
30
  nvml.nvmlInit()
31
  handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
32
  try:
@@ -51,29 +49,23 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
51
  except Exception:
52
  pass
53
  results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
54
- print("[DEBUG] NVML: finalizando...")
55
  nvml.nvmlShutdown()
56
  return results
57
- except Exception as e:
58
- print(f"[DEBUG] NVML indisponível ou falhou: {e}")
59
  return []
60
 
61
  def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
62
  cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
63
  try:
64
- print(f"[DEBUG] Rodando: {cmd}")
65
  out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
66
- except Exception as e:
67
- print(f"[DEBUG] nvidia-smi falhou: {e}")
68
  return []
69
  results = []
70
  for line in out.strip().splitlines():
71
  parts = [p.strip() for p in line.split(",")]
72
  if len(parts) >= 3:
73
  try:
74
- pid = int(parts[0])
75
- name = parts[1]
76
- used_mb = int(parts[2])
77
  user = "unknown"
78
  try:
79
  import psutil
@@ -164,9 +156,7 @@ class VideoService:
164
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
165
  print(f"[DEBUG] Device selecionado: {self.device}")
166
  self.last_memory_reserved_mb = 0.0
167
- self._tmp_dirs = set()
168
- self._tmp_files = set()
169
- self._last_outputs = []
170
 
171
  self.pipeline, self.latent_upsampler = self._load_models()
172
  print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
@@ -195,9 +185,7 @@ class VideoService:
195
  total_memory_mb = total_memory_b / (1024 ** 2)
196
  peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
197
  delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
198
- processes = _query_gpu_processes_via_nvml(device_index)
199
- if not processes:
200
- processes = _query_gpu_processes_via_nvidiasmi(device_index)
201
  print(f"\n--- [LOG GPU] {stage_name} (cuda:{device_index}) ---")
202
  print(f" - Reservado: {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB (Δ={delta_mb:+.2f} MB)")
203
  if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
@@ -208,43 +196,33 @@ class VideoService:
208
 
209
  def _register_tmp_dir(self, d: str):
210
  if d and os.path.isdir(d):
211
- self._tmp_dirs.add(d)
212
- print(f"[DEBUG] Registrado tmp dir: {d}")
213
 
214
  def _register_tmp_file(self, f: str):
215
  if f and os.path.exists(f):
216
- self._tmp_files.add(f)
217
- print(f"[DEBUG] Registrado tmp file: {f}")
218
 
219
  def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
220
  print("[DEBUG] Finalize: iniciando limpeza...")
221
- keep = set(keep_paths or [])
222
- extras = set(extra_paths or [])
223
-
224
  removed_files = 0
225
  for f in list(self._tmp_files | extras):
226
  try:
227
  if f not in keep and os.path.isfile(f):
228
- os.remove(f)
229
- removed_files += 1
230
- print(f"[DEBUG] Removido arquivo tmp: {f}")
231
  except Exception as e:
232
  print(f"[DEBUG] Falha removendo arquivo {f}: {e}")
233
  finally:
234
  self._tmp_files.discard(f)
235
-
236
  removed_dirs = 0
237
  for d in list(self._tmp_dirs):
238
  try:
239
  if d not in keep and os.path.isdir(d):
240
- shutil.rmtree(d, ignore_errors=True)
241
- removed_dirs += 1
242
- print(f"[DEBUG] Removido diretório tmp: {d}")
243
  except Exception as e:
244
  print(f"[DEBUG] Falha removendo diretório {d}: {e}")
245
  finally:
246
  self._tmp_dirs.discard(d)
247
-
248
  print(f"[DEBUG] Finalize: arquivos removidos={removed_files}, dirs removidos={removed_dirs}")
249
  gc.collect()
250
  try:
@@ -256,7 +234,6 @@ class VideoService:
256
  pass
257
  except Exception as e:
258
  print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
259
-
260
  try:
261
  self._log_gpu_memory("Após finalize")
262
  except Exception as e:
@@ -283,7 +260,7 @@ class VideoService:
283
  def _load_models(self):
284
  t0 = time.perf_counter()
285
  LTX_REPO = "Lightricks/LTX-Video"
286
- print("[DEBUG] Baixando checkpoint principal (hf_hub_download)...")
287
  distilled_model_path = hf_hub_download(
288
  repo_id=LTX_REPO,
289
  filename=self.config["checkpoint_path"],
@@ -294,7 +271,7 @@ class VideoService:
294
  self.config["checkpoint_path"] = distilled_model_path
295
  print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
296
 
297
- print("[DEBUG] Baixando upscaler espacial (hf_hub_download)...")
298
  spatial_upscaler_path = hf_hub_download(
299
  repo_id=LTX_REPO,
300
  filename=self.config["spatial_upscaler_model_path"],
@@ -339,15 +316,13 @@ class VideoService:
339
  try:
340
  if p.dtype == f8:
341
  with torch.no_grad():
342
- p.data = p.data.to(torch.bfloat16)
343
- p_cnt += 1
344
  except Exception:
345
  pass
346
  for _, b in module.named_buffers(recurse=True):
347
  try:
348
  if hasattr(b, "dtype") and b.dtype == f8:
349
- b.data = b.data.to(torch.bfloat16)
350
- b_cnt += 1
351
  except Exception:
352
  pass
353
  print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
@@ -385,38 +360,32 @@ class VideoService:
385
  print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
386
  return out
387
 
388
-
389
  def _decode_one_latent_to_pixel(self, latent_chw: torch.Tensor) -> torch.Tensor:
390
  """
391
- Decodifica um latente (C,H,W) para pixel (C,H,W) no intervalo [0,1].
392
- Usa pipeline.decode_latents se existir, senão pipeline.vae.decode.
393
  """
394
- if self.device == "cuda":
395
- ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
396
- else:
397
- ctx = contextlib.nullcontext()
398
  with ctx:
399
  if hasattr(self.pipeline, "decode_latents"):
400
  img_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
401
  elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
402
  img_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
403
  else:
404
- raise RuntimeError("Nenhum decoder encontrado (decode_latents/vae.decode).")
405
- img_chw = img_bchw[0]
406
- # Normaliza para [0,1] caso venha em [-1,1]
407
  if img_chw.min() < 0:
408
  img_chw = (img_chw.clamp(-1, 1) + 1.0) / 2.0
409
  else:
410
  img_chw = img_chw.clamp(0, 1)
411
  return img_chw
412
-
413
-
414
  def _pixels_to_uint8_np(self, pixel_chw: torch.Tensor, padding_values) -> np.ndarray:
415
  """
416
- Converte (C,H,W) float [0,1] em (H,W,C) uint8, aplicando crop do padding.
417
  """
418
  pad_left, pad_right, pad_top, pad_bottom = padding_values
419
- H, W = pixel_chw.shape[1], pixel_chw.shape[2]
420
  h_end = H - pad_bottom if pad_bottom > 0 else H
421
  w_end = W - pad_right if pad_right > 0 else W
422
  pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
@@ -426,14 +395,13 @@ class VideoService:
426
  .cpu()
427
  .numpy())
428
  return frame_hwc_u8
429
-
430
  def encode_latents_to_mp4(self, latents: torch.Tensor, output_path: str, fps: int, padding_values,
431
  progress_callback=None):
432
  """
433
- Pipeline final: latentes (B,C,T,H,W) -> decodifica cada quadro -> escreve MP4 incremental.
434
- Segue o padrão do encoder no outro app (frame a frame sem array 4D gigante).
435
  """
436
- T = latents.shape[2]
437
  print(f"[DEBUG] encode_latents_to_mp4: frames={T} out={output_path}")
438
  with imageio.get_writer(output_path, fps=fps, codec="libx264", quality=8) as writer:
439
  for i in range(T):
@@ -444,55 +412,7 @@ class VideoService:
444
  if progress_callback:
445
  progress_callback(i + 1, T)
446
  if i % getattr(self, "frame_log_every", 8) == 0:
447
- print(f"[DEBUG] encode frame {i}/{T}")
448
-
449
-
450
-
451
-
452
-
453
-
454
- def _decode_latents_to_video(self, latents: torch.Tensor, output_video_path: str, frame_rate: int,
455
- padding_values, progress_callback=None):
456
- print(f"[DEBUG] Decodificando latentes → vídeo: {output_video_path}")
457
- pad_left, pad_right, pad_top, pad_bottom = padding_values
458
- T = latents.shape[2]
459
- print(f"[DEBUG] Latentes shape={tuple(latents.shape)} frames={T}")
460
- start = time.perf_counter()
461
- with imageio.get_writer(output_video_path, fps=frame_rate, codec="libx264", quality=8) as writer:
462
- for i in range(T):
463
- latent_chw = latents[0, :, i].to(self.device)
464
- with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext():
465
- pixel_bchw = None
466
- if hasattr(self.pipeline, "decode_latents"):
467
- pixel_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
468
- if i % self.frame_log_every == 0:
469
- print(f"[DEBUG] decode_latents frame={i}")
470
- elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
471
- pixel_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
472
- if i % self.frame_log_every == 0:
473
- print(f"[DEBUG] vae.decode frame={i}")
474
- else:
475
- raise RuntimeError("Pipeline não possui decode_latents/vae.decode.")
476
- pixel_chw = pixel_bchw[0]
477
- if pixel_chw.min() < 0:
478
- pixel_chw = (pixel_chw.clamp(-1, 1) + 1.0) / 2.0
479
- else:
480
- pixel_chw = pixel_chw.clamp(0, 1)
481
- H, W = pixel_chw.shape[2]
482
- h_end = H - pad_bottom if pad_bottom > 0 else H
483
- w_end = W - pad_right if pad_right > 0 else W
484
- pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
485
- frame_hwc_u8 = (pixel_chw.permute(1, 2, 0)
486
- .mul(255)
487
- .to(torch.uint8)
488
- .cpu()
489
- .numpy())
490
- writer.append_data(frame_hwc_u8)
491
- if progress_callback:
492
- progress_callback(i + 1, T)
493
- if i % self.frame_log_every == 0:
494
- print(f"[DEBUG] frame {i}/{T} escrito.")
495
- print(f"[DEBUG] Decodificação+escrita concluída em {time.perf_counter()-start:.3f}s")
496
 
497
  def generate(
498
  self,
@@ -515,13 +435,13 @@ class VideoService:
515
  guidance_scale=3.0,
516
  improve_texture=True,
517
  progress_callback=None,
 
518
  external_decode=True,
519
  ):
520
  t_all = time.perf_counter()
521
  print(f"[DEBUG] generate() begin mode={mode} external_decode={external_decode} improve_texture={improve_texture}")
522
  if self.device == "cuda":
523
- torch.cuda.empty_cache()
524
- torch.cuda.reset_peak_memory_stats()
525
  self._log_gpu_memory("Início da Geração")
526
 
527
  if mode == "image-to-video" and not start_image_filepath:
@@ -530,11 +450,9 @@ class VideoService:
530
  raise ValueError("O vídeo de entrada é obrigatório para o modo video-to-video")
531
 
532
  used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
533
- seed_everething(used_seed)
534
- print(f"[DEBUG] Seed usado: {used_seed}")
535
 
536
- FPS = 24.0
537
- MAX_NUM_FRAMES = 257
538
  target_frames_rounded = round(duration * FPS)
539
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
540
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
@@ -561,6 +479,7 @@ class VideoService:
561
  conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
562
  print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
563
 
 
564
  call_kwargs = {
565
  "prompt": prompt,
566
  "negative_prompt": negative_prompt,
@@ -569,7 +488,7 @@ class VideoService:
569
  "num_frames": actual_num_frames,
570
  "frame_rate": int(FPS),
571
  "generator": generator,
572
- "output_type": "latent" if external_decode else "pt",
573
  "conditioning_items": conditioning_items if conditioning_items else None,
574
  "media_items": None,
575
  "decode_timestep": self.config["decode_timestep"],
@@ -583,7 +502,7 @@ class VideoService:
583
  "enhance_prompt": False,
584
  "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
585
  }
586
- print(f"[DEBUG] call_kwargs.output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
587
 
588
  if mode == "video-to-video":
589
  media = load_media_file(
@@ -597,7 +516,6 @@ class VideoService:
597
  print(f"[DEBUG] media_items shape={tuple(media.shape)}")
598
 
599
  latents = None
600
- result_tensor = None
601
  multi_scale_pipeline = None
602
 
603
  try:
@@ -626,18 +544,14 @@ class VideoService:
626
  result = multi_scale_pipeline(**multi_scale_call_kwargs)
627
  print(f"[DEBUG] multi_scale_pipeline tempo={time.perf_counter()-t_ms:.3f}s")
628
 
629
- if external_decode:
630
- if hasattr(result, "latents"):
631
- latents = result.latents
632
- elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
633
- latents = result.images
634
- else:
635
- latents = result
636
- print(f"[DEBUG] Latentes obtidos (multi-escala): shape={tuple(latents.shape)}")
637
  else:
638
- result_tensor = result.images if hasattr(result, "images") else result
639
- print(f"[DEBUG] Pixels obtidos (multi-escala): shape={tuple(result_tensor.shape)}")
640
- log_tensor_info(result_tensor, "Saída Multi-Scale (pixel)")
641
  else:
642
  single_pass_kwargs = call_kwargs.copy()
643
  first_pass_config = self.config.get("first_pass", {})
@@ -649,12 +563,9 @@ class VideoService:
649
  "skip_block_list": first_pass_config.get("skip_block_list"),
650
  }
651
  )
652
- schedule = first_pass_config.get("timesteps")
653
- if schedule is None:
654
- schedule = first_pass_config.get("guidance_timesteps")
655
  if mode == "video-to-video":
656
- schedule = [0.7]
657
- print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
658
  if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
659
  single_pass_kwargs["timesteps"] = schedule
660
  single_pass_kwargs["guidance_timesteps"] = schedule
@@ -667,51 +578,28 @@ class VideoService:
667
  result = self.pipeline(**single_pass_kwargs)
668
  print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")
669
 
670
- if external_decode:
671
- if hasattr(result, "latents"):
672
- latents = result.latents
673
- elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
674
- latents = result.images
675
- else:
676
- latents = result
677
- print(f"[DEBUG] Latentes obtidos (single-pass): shape={tuple(latents.shape)}")
678
  else:
679
- result_tensor = result.images if hasattr(result, "images") else result
680
- print(f"[DEBUG] Pixels obtidos (single-pass): shape={tuple(result_tensor.shape)}")
681
 
682
- temp_dir = tempfile.mkdtemp(prefix="ltxv_")
683
- self._register_tmp_dir(temp_dir)
684
- results_dir = "/app/output"
685
- os.makedirs(results_dir, exist_ok=True)
686
  output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
687
  final_output_path = None
688
 
689
- if external_decode:
690
- print("[DEBUG] Codificando a partir dos latentes (VAE externo) → MP4...")
691
- self.encode_latents_to_mp4(
692
- latents=latents,
693
- output_path=output_video_path,
694
- fps=call_kwargs["frame_rate"],
695
- padding_values=padding_values,
696
- progress_callback=progress_callback
697
- )
698
- else:
699
- print("[DEBUG] Escrevendo vídeo a partir de pixels (sem latentes)...")
700
- with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
701
- T = result_tensor.shape[2]
702
- for i in range(T):
703
- frame_chw = result_tensor[0, :, i]
704
- frame_hwc_u8 = (frame_chw.permute(1, 2, 0)
705
- .clamp(0, 1)
706
- .mul(255)
707
- .to(torch.uint8)
708
- .cpu()
709
- .numpy())
710
- writer.append_data(frame_hwc_u8)
711
- if progress_callback:
712
- progress_callback(i + 1, T)
713
- if i % self.frame_log_every == 0:
714
- print(f"[DEBUG] frame {i}/{T} escrito (pixel).")
715
 
716
  candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
717
  try:
@@ -736,10 +624,6 @@ class VideoService:
736
  del latents
737
  except Exception:
738
  pass
739
- try:
740
- del result_tensor
741
- except Exception:
742
- pass
743
  try:
744
  del multi_scale_pipeline
745
  except Exception:
@@ -763,3 +647,4 @@ class VideoService:
763
 
764
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
765
  video_generation_service = VideoService()
 
 
1
+ # ltx_server.py — VideoService (sempre output_type="latent") com VAE→pixels→MP4 no fim
 
2
 
3
  # --- 1. IMPORTAÇÕES ---
4
  import torch
 
25
  try:
26
  import psutil
27
  import pynvml as nvml
 
28
  nvml.nvmlInit()
29
  handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
30
  try:
 
49
  except Exception:
50
  pass
51
  results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
 
52
  nvml.nvmlShutdown()
53
  return results
54
+ except Exception:
 
55
  return []
56
 
57
  def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
58
  cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
59
  try:
 
60
  out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
61
+ except Exception:
 
62
  return []
63
  results = []
64
  for line in out.strip().splitlines():
65
  parts = [p.strip() for p in line.split(",")]
66
  if len(parts) >= 3:
67
  try:
68
+ pid = int(parts[^23_0]); name = parts[^23_1]; used_mb = int(parts[^23_2])
 
 
69
  user = "unknown"
70
  try:
71
  import psutil
 
156
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
157
  print(f"[DEBUG] Device selecionado: {self.device}")
158
  self.last_memory_reserved_mb = 0.0
159
+ self._tmp_dirs = set(); self._tmp_files = set(); self._last_outputs = []
 
 
160
 
161
  self.pipeline, self.latent_upsampler = self._load_models()
162
  print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
 
185
  total_memory_mb = total_memory_b / (1024 ** 2)
186
  peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
187
  delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
188
+ processes = _query_gpu_processes_via_nvml(device_index) or _query_gpu_processes_via_nvidiasmi(device_index)
 
 
189
  print(f"\n--- [LOG GPU] {stage_name} (cuda:{device_index}) ---")
190
  print(f" - Reservado: {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB (Δ={delta_mb:+.2f} MB)")
191
  if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
 
196
 
197
  def _register_tmp_dir(self, d: str):
198
  if d and os.path.isdir(d):
199
+ self._tmp_dirs.add(d); print(f"[DEBUG] Registrado tmp dir: {d}")
 
200
 
201
  def _register_tmp_file(self, f: str):
202
  if f and os.path.exists(f):
203
+ self._tmp_files.add(f); print(f"[DEBUG] Registrado tmp file: {f}")
 
204
 
205
  def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
206
  print("[DEBUG] Finalize: iniciando limpeza...")
207
+ keep = set(keep_paths or []); extras = set(extra_paths or [])
 
 
208
  removed_files = 0
209
  for f in list(self._tmp_files | extras):
210
  try:
211
  if f not in keep and os.path.isfile(f):
212
+ os.remove(f); removed_files += 1; print(f"[DEBUG] Removido arquivo tmp: {f}")
 
 
213
  except Exception as e:
214
  print(f"[DEBUG] Falha removendo arquivo {f}: {e}")
215
  finally:
216
  self._tmp_files.discard(f)
 
217
  removed_dirs = 0
218
  for d in list(self._tmp_dirs):
219
  try:
220
  if d not in keep and os.path.isdir(d):
221
+ shutil.rmtree(d, ignore_errors=True); removed_dirs += 1; print(f"[DEBUG] Removido diretório tmp: {d}")
 
 
222
  except Exception as e:
223
  print(f"[DEBUG] Falha removendo diretório {d}: {e}")
224
  finally:
225
  self._tmp_dirs.discard(d)
 
226
  print(f"[DEBUG] Finalize: arquivos removidos={removed_files}, dirs removidos={removed_dirs}")
227
  gc.collect()
228
  try:
 
234
  pass
235
  except Exception as e:
236
  print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
 
237
  try:
238
  self._log_gpu_memory("Após finalize")
239
  except Exception as e:
 
260
  def _load_models(self):
261
  t0 = time.perf_counter()
262
  LTX_REPO = "Lightricks/LTX-Video"
263
+ print("[DEBUG] Baixando checkpoint principal...")
264
  distilled_model_path = hf_hub_download(
265
  repo_id=LTX_REPO,
266
  filename=self.config["checkpoint_path"],
 
271
  self.config["checkpoint_path"] = distilled_model_path
272
  print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
273
 
274
+ print("[DEBUG] Baixando upscaler espacial...")
275
  spatial_upscaler_path = hf_hub_download(
276
  repo_id=LTX_REPO,
277
  filename=self.config["spatial_upscaler_model_path"],
 
316
  try:
317
  if p.dtype == f8:
318
  with torch.no_grad():
319
+ p.data = p.data.to(torch.bfloat16); p_cnt += 1
 
320
  except Exception:
321
  pass
322
  for _, b in module.named_buffers(recurse=True):
323
  try:
324
  if hasattr(b, "dtype") and b.dtype == f8:
325
+ b.data = b.data.to(torch.bfloat16); b_cnt += 1
 
326
  except Exception:
327
  pass
328
  print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
 
360
  print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
361
  return out
362
 
363
+ # === Decodificação “simples”: latentes → pixels → MP4 ===
364
  def _decode_one_latent_to_pixel(self, latent_chw: torch.Tensor) -> torch.Tensor:
365
  """
366
+ Decodifica um latente (C,H,W) para pixel (C,H,W) em [0,1].
 
367
  """
368
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
 
 
 
369
  with ctx:
370
  if hasattr(self.pipeline, "decode_latents"):
371
  img_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
372
  elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
373
  img_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
374
  else:
375
+ raise RuntimeError("Nenhum decoder (decode_latents/vae.decode) disponível.")
376
+ img_chw = img_bchw[^23_0]
 
377
  if img_chw.min() < 0:
378
  img_chw = (img_chw.clamp(-1, 1) + 1.0) / 2.0
379
  else:
380
  img_chw = img_chw.clamp(0, 1)
381
  return img_chw
382
+
 
383
  def _pixels_to_uint8_np(self, pixel_chw: torch.Tensor, padding_values) -> np.ndarray:
384
  """
385
+ Converte (C,H,W) float [0,1] em (H,W,C) uint8 com crop do padding.
386
  """
387
  pad_left, pad_right, pad_top, pad_bottom = padding_values
388
+ H, W = pixel_chw.shape[^23_1], pixel_chw.shape[^23_2]
389
  h_end = H - pad_bottom if pad_bottom > 0 else H
390
  w_end = W - pad_right if pad_right > 0 else W
391
  pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
 
395
  .cpu()
396
  .numpy())
397
  return frame_hwc_u8
398
+
399
  def encode_latents_to_mp4(self, latents: torch.Tensor, output_path: str, fps: int, padding_values,
400
  progress_callback=None):
401
  """
402
+ Latentes (B,C,T,H,W) decodifica quadro a quadro escreve MP4 incremental.
 
403
  """
404
+ T = latents.shape[^23_2]
405
  print(f"[DEBUG] encode_latents_to_mp4: frames={T} out={output_path}")
406
  with imageio.get_writer(output_path, fps=fps, codec="libx264", quality=8) as writer:
407
  for i in range(T):
 
412
  if progress_callback:
413
  progress_callback(i + 1, T)
414
  if i % getattr(self, "frame_log_every", 8) == 0:
415
+ print(f"[DEBUG] frame {i}/{T} codificado")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
  def generate(
418
  self,
 
435
  guidance_scale=3.0,
436
  improve_texture=True,
437
  progress_callback=None,
438
+ # Sempre latent→VAE→MP4 (simples)
439
  external_decode=True,
440
  ):
441
  t_all = time.perf_counter()
442
  print(f"[DEBUG] generate() begin mode={mode} external_decode={external_decode} improve_texture={improve_texture}")
443
  if self.device == "cuda":
444
+ torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
 
445
  self._log_gpu_memory("Início da Geração")
446
 
447
  if mode == "image-to-video" and not start_image_filepath:
 
450
  raise ValueError("O vídeo de entrada é obrigatório para o modo video-to-video")
451
 
452
  used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
453
+ seed_everething(used_seed); print(f"[DEBUG] Seed usado: {used_seed}")
 
454
 
455
+ FPS = 24.0; MAX_NUM_FRAMES = 257
 
456
  target_frames_rounded = round(duration * FPS)
457
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
458
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
 
479
  conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
480
  print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
481
 
482
+ # Sempre pedimos latentes (simples)
483
  call_kwargs = {
484
  "prompt": prompt,
485
  "negative_prompt": negative_prompt,
 
488
  "num_frames": actual_num_frames,
489
  "frame_rate": int(FPS),
490
  "generator": generator,
491
+ "output_type": "latent",
492
  "conditioning_items": conditioning_items if conditioning_items else None,
493
  "media_items": None,
494
  "decode_timestep": self.config["decode_timestep"],
 
502
  "enhance_prompt": False,
503
  "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
504
  }
505
+ print(f"[DEBUG] output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
506
 
507
  if mode == "video-to-video":
508
  media = load_media_file(
 
516
  print(f"[DEBUG] media_items shape={tuple(media.shape)}")
517
 
518
  latents = None
 
519
  multi_scale_pipeline = None
520
 
521
  try:
 
544
  result = multi_scale_pipeline(**multi_scale_call_kwargs)
545
  print(f"[DEBUG] multi_scale_pipeline tempo={time.perf_counter()-t_ms:.3f}s")
546
 
547
+ # Captura latentes
548
+ if hasattr(result, "latents"):
549
+ latents = result.latents
550
+ elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
551
+ latents = result.images
 
 
 
552
  else:
553
+ latents = result
554
+ print(f"[DEBUG] Latentes (multi-escala): shape={tuple(latents.shape)}")
 
555
  else:
556
  single_pass_kwargs = call_kwargs.copy()
557
  first_pass_config = self.config.get("first_pass", {})
 
563
  "skip_block_list": first_pass_config.get("skip_block_list"),
564
  }
565
  )
566
+ schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
 
 
567
  if mode == "video-to-video":
568
+ schedule = [0.7]; print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
 
569
  if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
570
  single_pass_kwargs["timesteps"] = schedule
571
  single_pass_kwargs["guidance_timesteps"] = schedule
 
578
  result = self.pipeline(**single_pass_kwargs)
579
  print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")
580
 
581
+ if hasattr(result, "latents"):
582
+ latents = result.latents
583
+ elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
584
+ latents = result.images
 
 
 
 
585
  else:
586
+ latents = result
587
+ print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
588
 
589
+ # Staging e escrita MP4 (simples: VAE→pixels→MP4)
590
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
591
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
 
592
  output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
593
  final_output_path = None
594
 
595
+ print("[DEBUG] Codificando a partir dos latentes (VAE externo) → MP4...")
596
+ self.encode_latents_to_mp4(
597
+ latents=latents,
598
+ output_path=output_video_path,
599
+ fps=call_kwargs["frame_rate"],
600
+ padding_values=padding_values,
601
+ progress_callback=progress_callback,
602
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
 
604
  candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
605
  try:
 
624
  del latents
625
  except Exception:
626
  pass
 
 
 
 
627
  try:
628
  del multi_scale_pipeline
629
  except Exception:
 
647
 
648
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
649
  video_generation_service = VideoService()
650
+