Eueuiaa commited on
Commit
fd94706
·
verified ·
1 Parent(s): 3450d95

Update api/ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +17 -112
api/ltx_server.py CHANGED
@@ -19,8 +19,7 @@ logging.set_verbosity_debug()
19
  LTXV_DEBUG=1
20
  LTXV_FRAME_LOG_EVERY=8
21
 
22
- from typing import List, Dict, Tuple, Any, Optional, Literal, Union
23
- from dataclasses import dataclass
24
 
25
  # --- 1. IMPORTAÇÕES ---
26
  import os, subprocess, shlex, tempfile
@@ -50,10 +49,6 @@ import torch.nn.functional as F
50
  from managers.vae_manager import vae_manager_singleton
51
  from tools.video_encode_tool import video_encode_tool_singleton
52
 
53
-
54
-
55
-
56
-
57
  # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
58
  def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
59
  try:
@@ -223,9 +218,7 @@ def add_deps_to_path():
223
  add_deps_to_path()
224
 
225
  # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
226
- global vae_encode, latent_to_pixel_coords, randn_tensor
227
- from ltx_video.models.autoencoders.vae_encode import vae_encode, latent_to_pixel_coords
228
-
229
  from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
230
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
231
  from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
@@ -249,70 +242,7 @@ def log_tensor_info(tensor, name="Tensor"):
249
  print("------------------------------------------\n")
250
 
251
 
252
- @dataclass
253
- class LatentConditioningItem:
254
- """Item de dados para condicionamento da pipeline LTX."""
255
- latent_tensor: torch.Tensor
256
- media_frame_number: int
257
- conditioning_strength: float
258
-
259
-
260
-
261
- def _aduc_prepare_conditioning_patch(
262
- self: "LTXVideoPipeline",
263
- conditioning_items: Optional[List[Union["ConditioningItem", "LatentConditioningItem"]]],
264
- init_latents: torch.Tensor,
265
- num_frames: int,
266
- height: int,
267
- width: int,
268
- vae_per_channel_normalize: bool = True,
269
- generator=None,
270
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, int]:
271
- if not conditioning_items:
272
- init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
273
- init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
274
- return init_latents, init_pixel_coords, None, 0
275
-
276
- init_conditioning_mask = torch.zeros_like(init_latents[:, 0, ...], dtype=torch.float32, device=init_latents.device)
277
- extra_conditioning_latents, extra_conditioning_pixel_coords, extra_conditioning_mask = [], [], []
278
- extra_conditioning_num_latents = 0
279
-
280
- for item in conditioning_items:
281
- if not isinstance(item, LatentConditioningItem):
282
- print("Patch ADUC: Item de condicionamento não é um LatentConditioningItem e será ignorado.")
283
- continue
284
-
285
- media_item_latents = item.latent_tensor.to(dtype=init_latents.dtype, device=init_latents.device)
286
- media_frame_number, strength = item.media_frame_number, item.conditioning_strength
287
-
288
- if media_frame_number == 0:
289
- f_l, h_l, w_l = media_item_latents.shape[-3:]
290
- init_latents[..., :f_l, :h_l, :w_l] = torch.lerp(init_latents[..., :f_l, :h_l, :w_l], media_item_latents, strength)
291
- init_conditioning_mask[..., :f_l, :h_l, :w_l] = strength
292
- else:
293
- noise = randn_tensor(media_item_latents.shape, generator=generator, device=media_item_latents.device, dtype=media_item_latents.dtype)
294
- media_item_latents = torch.lerp(noise, media_item_latents, strength)
295
- patched_latents, latent_coords = self.patchifier.patchify(latents=media_item_latents)
296
- pixel_coords = latent_to_pixel_coords(latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
297
- pixel_coords[:, 0] += media_frame_number
298
- extra_conditioning_num_latents += patched_latents.shape[1]
299
- new_mask = torch.full(patched_latents.shape[:2], strength, dtype=torch.float32, device=init_latents.device)
300
- extra_conditioning_latents.append(patched_latents)
301
- extra_conditioning_pixel_coords.append(pixel_coords)
302
- extra_conditioning_mask.append(new_mask)
303
-
304
- init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
305
- init_pixel_coords = latent_to_pixel_coords(init_latent_coords, self.vae, causal_fix=self.transformer.config.causal_temporal_positioning)
306
- init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
307
- init_conditioning_mask = init_conditioning_mask.squeeze(-1)
308
-
309
- if extra_conditioning_latents:
310
- init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
311
- init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
312
- init_conditioning_mask = torch.cat([*extra_conditioning_mask, init_conditioning_mask], dim=1)
313
-
314
- return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
315
-
316
 
317
 
318
  # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
@@ -329,12 +259,9 @@ class VideoService:
329
  self.last_memory_reserved_mb = 0.0
330
  self._tmp_dirs = set(); self._tmp_files = set(); self._last_outputs = []
331
 
332
-
333
  self.pipeline, self.latent_upsampler = self._load_models()
334
  print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
335
 
336
- #self._apply_ltx_pipeline_patches()
337
-
338
  print(f"[DEBUG] Movendo modelos para {self.device}...")
339
  self.pipeline.to(self.device)
340
  if self.latent_upsampler:
@@ -357,26 +284,6 @@ class VideoService:
357
 
358
  print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
359
 
360
-
361
-
362
-
363
- # Em ltx_server.py
364
- def _apply_ltx_pipeline_patches(self):
365
- """Aplica patches em tempo de execução na pipeline LTX para compatibilidade com ADUC-SDR."""
366
- # Lógica de importação tardia para o tipo, caso não esteja no escopo global
367
- from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
368
-
369
- print("LTX POOL MANAGER: Aplicando patches ADUC-SDR na pipeline LTX...")
370
-
371
- # APLICA O PATCH DIRETAMENTE NA ÚNICA INSTÂNCIA DO PIPELINE
372
- self.pipeline.prepare_conditioning = _aduc_prepare_conditioning_patch.__get__(
373
- self.pipeline, LTXVideoPipeline
374
- )
375
-
376
- print("LTX POOL MANAGER: A instância da pipeline foi corrigida com sucesso.")
377
-
378
-
379
-
380
  def _log_gpu_memory(self, stage_name: str):
381
  if self.device != "cuda":
382
  return
@@ -636,10 +543,10 @@ class VideoService:
636
  start = (num_latente_por_chunk*i)
637
  end = (start+num_latente_por_chunk+overlap)
638
  if i+1 < n_chunks:
639
- chunk = latents_brutos[:, :, start:end, :, :]
640
  print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
641
  else:
642
- chunk = latents_brutos[:, :, start:, :, :]
643
  print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
644
  chunks.append(chunk)
645
  i+=1
@@ -671,7 +578,7 @@ class VideoService:
671
  #if total % 2 == 1: # ÍMPAR
672
  # Ex: 11 → primeira 0..5, segunda 5..10
673
  cut = total // 2
674
- primeira = latents_brutos[:, :, :cut, :, :].clone()
675
  segunda = latents_brutos[:, :, cut:, :, :].clone()
676
 
677
 
@@ -876,7 +783,7 @@ class VideoService:
876
  "decode_timestep": self.config["decode_timestep"],
877
  "decode_noise_scale": self.config["decode_noise_scale"],
878
  "stochastic_sampling": self.config["stochastic_sampling"],
879
- "image_cond_noise_scale": 0.001,
880
  "is_video": True,
881
  "vae_per_channel_normalize": True,
882
  "mixed_precision": (self.config["precision"] == "mixed_precision"),
@@ -941,7 +848,7 @@ class VideoService:
941
  del base_latents; gc.collect(); torch.cuda.empty_cache()
942
 
943
  par = 0
944
- latents_cpu_up = upsampled_latents.to("cpu", non_blocking=True)
945
  torch.cuda.empty_cache()
946
  try:
947
  torch.cuda.ipc_collect()
@@ -959,13 +866,12 @@ class VideoService:
959
 
960
  #latents_parts_up = [latents_cpu_up]
961
 
962
- par = 1
963
  for latents in latents_parts_up:
964
 
965
  # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
966
  print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
967
- print(f"[DEBUG] Partição {par}: {tuple(lat_bup.shape)}")
968
- par+=1
969
  second_pass_config = self.config.get("second_pass", {}).copy()
970
  # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA PARA SECOND PASS> ---
971
  # Usa as dimensões da primeira passagem dobradas, como na pipeline original
@@ -984,7 +890,6 @@ class VideoService:
984
 
985
  second_pass_kwargs = call_kwargs.copy()
986
  second_pass_kwargs.update({
987
- "conditioning_items": None,
988
  "output_type": "latent",
989
  "width": second_pass_width,
990
  "height": second_pass_height,
@@ -1025,7 +930,7 @@ class VideoService:
1025
 
1026
  for latents_vae in latents_list:
1027
 
1028
- latents_cpu_vae = latents_vae.to("cpu", non_blocking=True)
1029
  torch.cuda.empty_cache()
1030
  try:
1031
  torch.cuda.ipc_collect()
@@ -1036,14 +941,14 @@ class VideoService:
1036
 
1037
 
1038
  lat_a, lat_b = self._dividir_latentes(latents_cpu_vae)
1039
- #print(f"[DEBUG] Partição A: {tuple(lat_a.shape)}")
1040
- #print(f"[DEBUG] Partição B: {tuple(lat_b.shape)}")
1041
 
1042
  latents_parts_vae = [lat_a, lat_b]
1043
 
1044
 
1045
  for latents in latents_parts_vae:
1046
- print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
1047
 
1048
  par = par + 1
1049
  output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
@@ -1077,10 +982,10 @@ class VideoService:
1077
 
1078
  total_partes = len(partes_mp4)
1079
  if (total_partes>1):
1080
- final_vid = os.path.join(results_dir, f"concat.mp4")
1081
  #partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
1082
- final_vid = video_encode_tool_singleton.concatenate_videos(video_paths=partes_mp4, output_path="concate.mp4", workspace_dir=results_dir, start=4, overlap=4)
1083
- #self._concat_mp4s_no_reencode(partes_mp4, final_vid)
1084
  else:
1085
  final_vid = partes_mp4[0]
1086
 
 
19
  LTXV_DEBUG=1
20
  LTXV_FRAME_LOG_EVERY=8
21
 
22
+
 
23
 
24
  # --- 1. IMPORTAÇÕES ---
25
  import os, subprocess, shlex, tempfile
 
49
  from managers.vae_manager import vae_manager_singleton
50
  from tools.video_encode_tool import video_encode_tool_singleton
51
 
 
 
 
 
52
  # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
53
  def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
54
  try:
 
218
  add_deps_to_path()
219
 
220
  # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
221
+
 
 
222
  from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
223
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
224
  from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
 
242
  print("------------------------------------------\n")
243
 
244
 
245
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
 
248
  # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
 
259
  self.last_memory_reserved_mb = 0.0
260
  self._tmp_dirs = set(); self._tmp_files = set(); self._last_outputs = []
261
 
 
262
  self.pipeline, self.latent_upsampler = self._load_models()
263
  print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
264
 
 
 
265
  print(f"[DEBUG] Movendo modelos para {self.device}...")
266
  self.pipeline.to(self.device)
267
  if self.latent_upsampler:
 
284
 
285
  print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  def _log_gpu_memory(self, stage_name: str):
288
  if self.device != "cuda":
289
  return
 
543
  start = (num_latente_por_chunk*i)
544
  end = (start+num_latente_por_chunk+overlap)
545
  if i+1 < n_chunks:
546
+ chunk = latents_brutos[:, :, start:end, :, :].clone().detach()
547
  print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
548
  else:
549
+ chunk = latents_brutos[:, :, start:, :, :].clone().detach()
550
  print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
551
  chunks.append(chunk)
552
  i+=1
 
578
  #if total % 2 == 1: # ÍMPAR
579
  # Ex: 11 → primeira 0..5, segunda 5..10
580
  cut = total // 2
581
+ primeira = latents_brutos[:, :, :cut+1, :, :].clone()
582
  segunda = latents_brutos[:, :, cut:, :, :].clone()
583
 
584
 
 
783
  "decode_timestep": self.config["decode_timestep"],
784
  "decode_noise_scale": self.config["decode_noise_scale"],
785
  "stochastic_sampling": self.config["stochastic_sampling"],
786
+ "image_cond_noise_scale": 0.05,
787
  "is_video": True,
788
  "vae_per_channel_normalize": True,
789
  "mixed_precision": (self.config["precision"] == "mixed_precision"),
 
848
  del base_latents; gc.collect(); torch.cuda.empty_cache()
849
 
850
  par = 0
851
+ latents_cpu_up = upsampled_latents.detach().to("cpu", non_blocking=True)
852
  torch.cuda.empty_cache()
853
  try:
854
  torch.cuda.ipc_collect()
 
866
 
867
  #latents_parts_up = [latents_cpu_up]
868
 
869
+
870
  for latents in latents_parts_up:
871
 
872
  # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
873
  print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
874
+
 
875
  second_pass_config = self.config.get("second_pass", {}).copy()
876
  # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA PARA SECOND PASS> ---
877
  # Usa as dimensões da primeira passagem dobradas, como na pipeline original
 
890
 
891
  second_pass_kwargs = call_kwargs.copy()
892
  second_pass_kwargs.update({
 
893
  "output_type": "latent",
894
  "width": second_pass_width,
895
  "height": second_pass_height,
 
930
 
931
  for latents_vae in latents_list:
932
 
933
+ latents_cpu_vae = latents_vae.detach().to("cpu", non_blocking=True)
934
  torch.cuda.empty_cache()
935
  try:
936
  torch.cuda.ipc_collect()
 
941
 
942
 
943
  lat_a, lat_b = self._dividir_latentes(latents_cpu_vae)
944
+ print(f"[DEBUG] Partição A: {tuple(lat_a.shape)}")
945
+ print(f"[DEBUG] Partição B: {tuple(lat_b.shape)}")
946
 
947
  latents_parts_vae = [lat_a, lat_b]
948
 
949
 
950
  for latents in latents_parts_vae:
951
+ #print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
952
 
953
  par = par + 1
954
  output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
 
982
 
983
  total_partes = len(partes_mp4)
984
  if (total_partes>1):
985
+ final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
986
  #partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
987
+ final_vid = video_encode_tool_singleton.concatenate_videos(video_paths=partes_mp4, output_path="concate_fim.mp4", workspace_dir=results_dir)
988
+ self._concat_mp4s_no_reencode(partes_mp4, final_vid)
989
  else:
990
  final_vid = partes_mp4[0]
991