Eueuiaa commited on
Commit
6eb870c
·
verified ·
1 Parent(s): 2de1ae7

Update api/ltx_server_refactored.py

Browse files
Files changed (1) hide show
  1. api/ltx_server_refactored.py +246 -0
api/ltx_server_refactored.py CHANGED
@@ -518,6 +518,252 @@ class VideoService:
518
  self.finalize(keep_paths=[])
519
 
520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
 
522
  # ==============================================================================
523
  # --- FUNÇÃO #4: ORQUESTRADOR (Upscaler + texturas hd) ---
 
518
  self.finalize(keep_paths=[])
519
 
520
 
521
+
522
+
523
+ # Em api/ltx_server_refactored.py -> dentro da classe VideoService
524
+
525
+ # ==============================================================================
526
+ # --- FUNÇÕES DE GERAÇÃO ATUALIZADAS E MODULARES ---
527
+ # ==============================================================================
528
+
529
+ def _generate_single_chunk_low(
530
+ self, prompt, negative_prompt,
531
+ height, width, num_frames,
532
+ seed, ltx_configs_override=None):
533
+ """
534
+ [NÓ DE GERAÇÃO] Gera um ÚNICO chunk de latentes brutos.
535
+ """
536
+ print("\n" + "-"*20 + " INÍCIO: _generate_single_chunk_low " + "-"*20)
537
+ try:
538
+ height_padded = ((height - 1) // 8 + 1) * 8
539
+ width_padded = ((width - 1) // 8 + 1) * 8
540
+ generator = torch.Generator(device=self.device).manual_seed(seed)
541
+
542
+ downscale_factor = self.config.get("downscale_factor", 0.6666666)
543
+ vae_scale_factor = self.pipeline.vae_scale_factor
544
+
545
+ x_width = int(width_padded * downscale_factor)
546
+ downscaled_width = x_width - (x_width % vae_scale_factor)
547
+ x_height = int(height_padded * downscale_factor)
548
+ downscaled_height = x_height - (x_height % vae_scale_factor)
549
+
550
+ all_conditions = ltx_configs_override.get("conditioning_items", [])
551
+
552
+ pipeline_kwargs = self.config.get("first_pass", {}).copy()
553
+
554
+ if ltx_configs_override:
555
+ print("[DEBUG] Sobrepondo configurações do LTX com valores da UI...")
556
+ preset = ltx_configs_override.get("guidance_preset")
557
+ if preset == "Customizado":
558
+ try:
559
+ pipeline_kwargs["guidance_scale"] = json.loads(ltx_configs_override["guidance_scale_list"])
560
+ pipeline_kwargs["stg_scale"] = json.loads(ltx_configs_override["stg_scale_list"])
561
+ pipeline_kwargs["guidance_timesteps"] = json.loads(ltx_configs_override["timesteps_list"])
562
+ except Exception as e:
563
+ print(f" > ERRO ao parsear valores customizados: {e}. Usando Padrão.")
564
+ elif preset == "Agressivo":
565
+ pipeline_kwargs["guidance_scale"] = [1, 2, 8, 12, 8, 2, 1]
566
+ pipeline_kwargs["stg_scale"] = [0, 0, 5, 6, 5, 3, 2]
567
+ elif preset == "Suave":
568
+ pipeline_kwargs["guidance_scale"] = [1, 1, 4, 5, 4, 1, 1]
569
+ pipeline_kwargs["stg_scale"] = [0, 0, 2, 2, 2, 1, 0]
570
+
571
+ pipeline_kwargs["num_inference_steps"] = ltx_configs_override.get("fp_num_inference_steps", pipeline_kwargs.get("num_inference_steps"))
572
+ pipeline_kwargs["skip_initial_inference_steps"] = ltx_configs_override.get("ship_initial_inference_steps", pipeline_kwargs.get("skip_initial_inference_steps"))
573
+ pipeline_kwargs["skip_final_inference_steps"] = ltx_configs_override.get("ship_final_inference_steps", pipeline_kwargs.get("skip_final_inference_steps"))
574
+
575
+ pipeline_kwargs.update({
576
+ "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
577
+ "num_frames": num_frames, "frame_rate": 24, "generator": generator, "output_type": "latent",
578
+ "conditioning_items": all_conditions if all_conditions else None,
579
+ })
580
+
581
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
582
+ latents_bruto = self.pipeline(**pipeline_kwargs).images
583
+ log_tensor_info(latents_bruto, f"Latente Bruto Gerado para: '{prompt[:40]}...'")
584
+
585
+ print("-" * 20 + " FIM: _generate_single_chunk_low " + "-"*20)
586
+ return latents_bruto
587
+
588
+ except Exception as e:
589
+ print("-" * 20 + f" ERRO: _generate_single_chunk_low {e} " + "-"*20)
590
+ traceback.print_exc()
591
+ return None
592
+ finally:
593
+ torch.cuda.empty_cache()
594
+ torch.cuda.ipc_collect()
595
+ self.finalize(keep_paths=[])
596
+
597
+
598
+ def generate_narrative_low(
599
+ self, prompt: str, negative_prompt,
600
+ height, width, duration,
601
+ seed, initial_image_conditions=None, overlap_frames: int = 8,
602
+ ltx_configs_override: dict = None):
603
+
604
+ print("\n" + "="*80)
605
+ print("====== INICIANDO GERAÇÃO NARRATIVA EM CHUNKS (LOW-RES) ======")
606
+ print("="*80)
607
+
608
+ used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
609
+ seed_everething(used_seed)
610
+ FPS = 24.0
611
+
612
+ prompt_list = [p.strip() for p in prompt.splitlines() if p.strip()]
613
+ num_chunks = len(prompt_list)
614
+ if num_chunks == 0: raise ValueError("O prompt está vazio ou não contém linhas válidas.")
615
+
616
+ total_actual_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
617
+
618
+ if num_chunks > 1:
619
+ total_blocks = (total_actual_frames - 1) // 8
620
+ blocks_per_chunk = total_blocks // num_chunks
621
+ blocks_last_chunk = total_blocks - (blocks_per_chunk * (num_chunks - 1))
622
+ frames_per_chunk = blocks_per_chunk * 8 + 1
623
+ frames_per_chunk_last = blocks_last_chunk * 8 + 1
624
+ else:
625
+ frames_per_chunk = total_actual_frames
626
+ frames_per_chunk_last = total_actual_frames
627
+
628
+ frames_per_chunk = max(9, frames_per_chunk)
629
+ frames_per_chunk_last = max(9, frames_per_chunk_last)
630
+
631
+ poda_latents_num = overlap_frames // self.pipeline.video_scale_factor if self.pipeline.video_scale_factor > 0 else 0
632
+
633
+ latentes_chunk_video = []
634
+ condition_item_latent_overlap = None
635
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_narrative_"); self._register_tmp_dir(temp_dir)
636
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
637
+
638
+ for i, chunk_prompt in enumerate(prompt_list):
639
+ print(f"\n--- Gerando Chunk Narrativo {i+1}/{num_chunks}: '{chunk_prompt}' ---")
640
+
641
+ current_image_conditions = []
642
+ if initial_image_conditions:
643
+ cond_item_original = initial_image_conditions[0]
644
+ if i == 0:
645
+ current_image_conditions.append(cond_item_original)
646
+ else:
647
+ cond_item_fraco = ConditioningItem(
648
+ media_item=cond_item_original.media_item, media_frame_number=0, conditioning_strength=0.1
649
+ )
650
+ current_image_conditions.append(cond_item_fraco)
651
+
652
+ if ltx_configs_override is None: ltx_configs_override = {}
653
+ current_conditions = []
654
+ if current_image_conditions: current_conditions.extend(current_image_conditions)
655
+ if condition_item_latent_overlap: current_conditions.append(condition_item_latent_overlap)
656
+ ltx_configs_override["conditioning_items"] = current_conditions
657
+
658
+ num_frames_para_gerar = frames_per_chunk_last if i == num_chunks - 1 else frames_per_chunk
659
+ if i > 0 and poda_latents_num > 0:
660
+ num_frames_para_gerar += overlap_frames
661
+
662
+ latentes_bruto = self._generate_single_chunk_low(
663
+ prompt=chunk_prompt, negative_prompt=negative_prompt, height=height, width=width,
664
+ num_frames=num_frames_para_gerar, seed=used_seed + i,
665
+ ltx_configs_override=ltx_configs_override
666
+ )
667
+
668
+ if latentes_bruto is None:
669
+ print(f"ERRO FATAL: A geração do chunk {i+1} falhou. Abortando.")
670
+ self.finalize(keep_paths=[])
671
+ return None, None, None
672
+
673
+ if i > 0 and poda_latents_num > 0:
674
+ latentes_bruto = latentes_bruto[:, :, poda_latents_num:, :, :]
675
+
676
+ latentes_podado = latentes_bruto.clone().detach()
677
+ if i < num_chunks - 1 and poda_latents_num > 0:
678
+ latentes_podado = latentes_bruto[:, :, :-poda_latents_num, :, :].clone()
679
+ overlap_latents = latentes_bruto[:, :, -poda_latents_num:, :, :].clone()
680
+ condition_item_latent_overlap = ConditioningItem(
681
+ media_item=overlap_latents, media_frame_number=0, conditioning_strength=1.0
682
+ )
683
+ latentes_chunk_video.append(latentes_podado)
684
+
685
+ final_latents_cpu = torch.cat(latentes_chunk_video, dim=2).cpu()
686
+ log_tensor_info(final_latents_cpu, "Tensor de Latentes Final Concatenado (CPU)")
687
+
688
+ tensor_path = os.path.join(results_dir, f"latents_narrative_{used_seed}.pt")
689
+ torch.save(final_latents_cpu, tensor_path)
690
+
691
+ try:
692
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
693
+ final_latents_gpu = final_latents_cpu.to(self.device)
694
+ pixel_tensor = vae_manager_singleton.decode(final_latents_gpu, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
695
+ video_path = self._save_and_log_video(pixel_tensor, "narrative_video", FPS, temp_dir, results_dir, used_seed)
696
+
697
+ self.finalize(keep_paths=[video_path, tensor_path])
698
+ return video_path, tensor_path, used_seed
699
+
700
+ except Exception as e:
701
+ print("-" * 20 + f" ERRO: generate_narrative_low {e} " + "-"*20)
702
+ traceback.print_exc()
703
+ return None
704
+ finally:
705
+ torch.cuda.empty_cache()
706
+ torch.cuda.ipc_collect()
707
+ self.finalize(keep_paths=[])
708
+
709
+
710
+ def generate_single_low(
711
+ self, prompt: str, negative_prompt,
712
+ height, width, duration,
713
+ seed, initial_image_conditions=None,
714
+ ltx_configs_override: dict = None):
715
+
716
+ print("\n" + "="*80)
717
+ print("====== INICIANDO GERAÇÃO SIMPLES EM CHUNK ÚNICO (LOW-RES) ======")
718
+ print("="*80)
719
+
720
+ used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
721
+ seed_everething(used_seed)
722
+ FPS = 24.0
723
+
724
+ total_actual_frames = max(9, int(round((round(duration * FPS) - 1) / 8.0) * 8 + 1))
725
+
726
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_single_"); self._register_tmp_dir(temp_dir)
727
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
728
+
729
+ if ltx_configs_override is None: ltx_configs_override = {}
730
+ ltx_configs_override["conditioning_items"] = initial_image_conditions if initial_image_conditions else []
731
+
732
+ final_latents = self._generate_single_chunk_low(
733
+ prompt=prompt, negative_prompt=negative_prompt, height=height, width=width,
734
+ num_frames=total_actual_frames, seed=used_seed,
735
+ ltx_configs_override=ltx_configs_override
736
+ )
737
+
738
+ if final_latents is None:
739
+ print(f"ERRO FATAL: A geração do chunk único falhou. Abortando.")
740
+ self.finalize(keep_paths=[])
741
+ return None, None, None
742
+
743
+ final_latents_cpu = final_latents.cpu()
744
+ log_tensor_info(final_latents_cpu, "Tensor de Latentes Final (CPU)")
745
+
746
+ tensor_path = os.path.join(results_dir, f"latents_single_{used_seed}.pt")
747
+ torch.save(final_latents_cpu, tensor_path)
748
+
749
+ try:
750
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device.type == 'cuda'):
751
+ final_latents_gpu = final_latents_cpu.to(self.device)
752
+ pixel_tensor = vae_manager_singleton.decode(final_latents_gpu, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
753
+ video_path = self._save_and_log_video(pixel_tensor, "single_video", FPS, temp_dir, results_dir, used_seed)
754
+
755
+ self.finalize(keep_paths=[video_path, tensor_path])
756
+ return video_path, tensor_path, used_seed
757
+
758
+ except Exception as e:
759
+ print("-" * 20 + f" ERRO: generate_single_low {e} " + "-"*20)
760
+ traceback.print_exc()
761
+ return None
762
+ finally:
763
+ torch.cuda.empty_cache()
764
+ torch.cuda.ipc_collect()
765
+ self.finalize(keep_paths=[])
766
+
767
 
768
  # ==============================================================================
769
  # --- FUNÇÃO #4: ORQUESTRADOR (Upscaler + texturas hd) ---