Spaces:

ByteDance
/

InfiniteYou-FLUX

Running on Zero

App Files Files Community

EndlessSora commited on Mar 22

Commit

dc8acb8

1 Parent(s): cbaad8e

improve memory usage for zero GPUs

Browse files

Files changed (3) hide show

app.py +58 -27
pipelines/pipeline_flux_infusenet.py +5 -8
pipelines/pipeline_infu_flux.py +59 -10

app.py CHANGED Viewed

@@ -60,6 +60,38 @@ def download_models():
         exit()
 def prepare_pipeline(model_version, enable_realism, enable_anti_blur):
     if (
         loaded_pipeline_config['pipeline'] is not None
@@ -74,34 +106,34 @@ def prepare_pipeline(model_version, enable_realism, enable_anti_blur):
     loaded_pipeline_config["model_version"] = model_version
     pipeline = loaded_pipeline_config['pipeline']
-    if pipeline is None or pipeline.model_version != model_version:
-        del loaded_pipeline_config['pipeline']
-        del pipeline
-        gc.collect()
-        torch.cuda.empty_cache()
-        model_path = f'./models/InfiniteYou/infu_flux_v1.0/{model_version}'
-        print(f'loading model from {model_path}')
-        pipeline = InfUFluxPipeline(
-            base_model_path='./models/FLUX.1-dev',
-            infu_model_path=model_path,
-            insightface_root_path='./models/InfiniteYou/supports/insightface',
-            image_proj_num_tokens=8,
-            infu_flux_version='v1.0',
-            model_version=model_version,
-        )
         loaded_pipeline_config['pipeline'] = pipeline
     pipeline.pipe.delete_adapters(['realism', 'anti_blur'])
     loras = []
-    if enable_realism:
-        loras.append(['./models/InfiniteYou/supports/optional_loras/flux_realism_lora.safetensors', 'realism', 1.0])
-    if enable_anti_blur:
-        loras.append(['./models/InfiniteYou/supports/optional_loras/flux_anti_blur_lora.safetensors', 'anti_blur', 1.0])
-    pipeline.load_loras(loras)
     return pipeline
@@ -238,7 +270,7 @@ with gr.Blocks() as demo:
         inputs=[ui_id_image, ui_control_image, ui_prompt_text, ui_seed, ui_enable_realism, ui_enable_anti_blur, ui_model_version],
         outputs=[image_output],
         fn=generate_examples,
-        cache_examples=False
     )
     ui_btn_generate.click(
@@ -309,10 +341,9 @@ huggingface_hub.login(os.getenv('PRIVATE_HF_TOKEN'))
 download_models()
-prepare_pipeline(model_version='sim_stage1', enable_realism=True, enable_anti_blur=True)
-prepare_pipeline(model_version=ModelVersion.DEFAULT_VERSION, enable_realism=ENABLE_REALISM_DEFAULT, enable_anti_blur=ENABLE_ANTI_BLUR_DEFAULT)
-demo.queue()
 demo.launch()
 # demo.launch(server_name='0.0.0.0')  # IPv4
 # demo.launch(server_name='[::]')  # IPv6

         exit()
+def init_pipeline(model_version, enable_realism, enable_anti_blur):
+    loaded_pipeline_config["enable_realism"] = enable_realism
+    loaded_pipeline_config["enable_anti_blur"] = enable_anti_blur
+    loaded_pipeline_config["model_version"] = model_version
+    pipeline = loaded_pipeline_config['pipeline']
+    gc.collect()
+    torch.cuda.empty_cache()
+    model_path = f'./models/InfiniteYou/infu_flux_v1.0/{model_version}'
+    print(f'loading model from {model_path}')
+    pipeline = InfUFluxPipeline(
+        base_model_path='./models/FLUX.1-dev',
+        infu_model_path=model_path,
+        insightface_root_path='./models/InfiniteYou/supports/insightface',
+        image_proj_num_tokens=8,
+        infu_flux_version='v1.0',
+        model_version=model_version,
+    )
+    loaded_pipeline_config['pipeline'] = pipeline
+    pipeline.pipe.delete_adapters(['realism', 'anti_blur'])
+    loras = []
+    if enable_realism: loras.append(['realism', 1.0])
+    if enable_anti_blur: loras.append(['anti_blur', 1.0])
+    pipeline.load_loras_state_dict(loras)
+    return pipeline
 def prepare_pipeline(model_version, enable_realism, enable_anti_blur):
     if (
         loaded_pipeline_config['pipeline'] is not None
     loaded_pipeline_config["model_version"] = model_version
     pipeline = loaded_pipeline_config['pipeline']
+    if pipeline is None or pipeline.model_version != model_version:
+        print(f'Switching model to {model_version}')
+        pipeline.model_version = model_version
+        if model_version == 'aes_stage2':
+            pipeline.infusenet_sim.cpu()
+            pipeline.image_proj_model_sim.cpu()
+            torch.cuda.empty_cache()
+            pipeline.infusenet_aes.to(pipeline.pipe.device)
+            pipeline.pipe.controlnet = pipeline.infusenet_aes
+            pipeline.image_proj_model_aes.to(pipeline.pipe.device)
+            pipeline.image_proj_model = pipeline.image_proj_model_aes
+        else:
+            pipeline.infusenet_aes.cpu()
+            pipeline.image_proj_model_aes.cpu()
+            torch.cuda.empty_cache()
+            pipeline.infusenet_sim.to(pipeline.pipe.device)
+            pipeline.pipe.controlnet = pipeline.infusenet_sim
+            pipeline.image_proj_model_sim.to(pipeline.pipe.device)
+            pipeline.image_proj_model = pipeline.image_proj_model_sim
         loaded_pipeline_config['pipeline'] = pipeline
     pipeline.pipe.delete_adapters(['realism', 'anti_blur'])
     loras = []
+    if enable_realism: loras.append(['realism', 1.0])
+    if enable_anti_blur: loras.append(['anti_blur', 1.0])
+    pipeline.load_loras_state_dict(loras)
     return pipeline
         inputs=[ui_id_image, ui_control_image, ui_prompt_text, ui_seed, ui_enable_realism, ui_enable_anti_blur, ui_model_version],
         outputs=[image_output],
         fn=generate_examples,
+        cache_examples=True
     )
     ui_btn_generate.click(
 download_models()
+init_pipeline(model_version=ModelVersion.DEFAULT_VERSION, enable_realism=ENABLE_REALISM_DEFAULT, enable_anti_blur=ENABLE_ANTI_BLUR_DEFAULT)
+# demo.queue()
 demo.launch()
 # demo.launch(server_name='0.0.0.0')  # IPv4
 # demo.launch(server_name='[::]')  # IPv6

pipelines/pipeline_flux_infusenet.py CHANGED Viewed

@@ -261,9 +261,6 @@ class FluxInfuseNetPipeline(FluxControlNetPipeline):
             images.
         """
-        # CPU offload controlnet
-        self.controlnet.cpu()
         height = height or self.default_sample_size * self.vae_scale_factor
         width = width or self.default_sample_size * self.vae_scale_factor
@@ -307,6 +304,11 @@ class FluxInfuseNetPipeline(FluxControlNetPipeline):
         device = self._execution_device
         dtype = self.transformer.dtype
         lora_scale = (
             self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
         )
@@ -599,11 +601,6 @@ class FluxInfuseNetPipeline(FluxControlNetPipeline):
                 if XLA_AVAILABLE:
                     xm.mark_step()
-        # CPU offload controlnet, move back T5 to GPU
-        self.controlnet.cpu()
-        torch.cuda.empty_cache()
-        self.text_encoder_2.to(device)
         if output_type == "latent":
             image = latents

             images.
         """
         height = height or self.default_sample_size * self.vae_scale_factor
         width = width or self.default_sample_size * self.vae_scale_factor
         device = self._execution_device
         dtype = self.transformer.dtype
+        # CPU offload controlnet, move back T5 to GPU
+        self.controlnet.cpu()
+        torch.cuda.empty_cache()
+        self.text_encoder_2.to(device)
         lora_scale = (
             self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
         )
                 if XLA_AVAILABLE:
                     xm.mark_step()
         if output_type == "latent":
             image = latents

pipelines/pipeline_infu_flux.py CHANGED Viewed

@@ -137,26 +137,33 @@ class InfUFluxPipeline:
         # Load pipeline
         try:
-            infusenet_path = os.path.join(infu_model_path, 'InfuseNetModel')
-            self.infusenet = FluxControlNetModel.from_pretrained(infusenet_path, torch_dtype=torch.bfloat16)
         except:
             print("No InfiniteYou model found. Downloading from HuggingFace `ByteDance/InfiniteYou` to `./models/InfiniteYou` ...")
             snapshot_download(repo_id='ByteDance/InfiniteYou', local_dir='./models/InfiniteYou', local_dir_use_symlinks=False)
-            infu_model_path = os.path.join('./models/InfiniteYou', f'infu_flux_{infu_flux_version}', model_version)
             infusenet_path = os.path.join(infu_model_path, 'InfuseNetModel')
-            self.infusenet = FluxControlNetModel.from_pretrained(infusenet_path, torch_dtype=torch.bfloat16)
             insightface_root_path = './models/InfiniteYou/supports/insightface'
         try:
             pipe = FluxInfuseNetPipeline.from_pretrained(
                 base_model_path,
-                controlnet=self.infusenet,
                 torch_dtype=torch.bfloat16,
             )
         except:
             try:
                 pipe = FluxInfuseNetPipeline.from_single_file(
                     base_model_path,
-                    controlnet=self.infusenet,
                     torch_dtype=torch.bfloat16,
                 )
             except Exception as e:
@@ -168,8 +175,9 @@ class InfUFluxPipeline:
                 print('\nIf you are using other models, please download them to a local directory and use `base_model_path` to specify the correct path.')
                 exit()
         pipe.to('cuda', torch.bfloat16)
-        # CPU offload controlnet in advance
         pipe.controlnet.cpu()
         torch.cuda.empty_cache()
         # pipe.enable_model_cpu_offload()
         self.pipe = pipe
@@ -187,14 +195,33 @@ class InfUFluxPipeline:
             output_dim=4096,
             ff_mult=4,
         )
-        image_proj_model_path = os.path.join(infu_model_path, 'image_proj_model.bin')
         ipm_state_dict = torch.load(image_proj_model_path, map_location="cpu")
         image_proj_model.load_state_dict(ipm_state_dict['image_proj'])
         del ipm_state_dict
         image_proj_model.to('cuda', torch.bfloat16)
         image_proj_model.eval()
-        self.image_proj_model = image_proj_model
         # Load face encoder
         self.app_640 = FaceAnalysis(name='antelopev2',
@@ -211,12 +238,34 @@ class InfUFluxPipeline:
         self.arcface_model = init_recognition_model('arcface', device='cuda')
     def load_loras(self, loras):
         names, scales = [],[]
         for lora_path, lora_name, lora_scale in loras:
             if lora_path != "":
                 print(f"loading lora {lora_path}")
-                self.pipe.load_lora_weights(lora_path, adapter_name = lora_name)
                 names.append(lora_name)
                 scales.append(lora_scale)

         # Load pipeline
         try:
+            infusenet_path = os.path.join(os.path.dirname(infu_model_path), 'aes_stage2', 'InfuseNetModel')
+            self.infusenet_aes = FluxControlNetModel.from_pretrained(infusenet_path, torch_dtype=torch.bfloat16)
+            infusenet_path = os.path.join(os.path.dirname(infu_model_path), 'sim_stage1', 'InfuseNetModel')
+            self.infusenet_sim = FluxControlNetModel.from_pretrained(infusenet_path, torch_dtype=torch.bfloat16)
         except:
             print("No InfiniteYou model found. Downloading from HuggingFace `ByteDance/InfiniteYou` to `./models/InfiniteYou` ...")
             snapshot_download(repo_id='ByteDance/InfiniteYou', local_dir='./models/InfiniteYou', local_dir_use_symlinks=False)
+            infu_model_path = os.path.join('./models/InfiniteYou', f'infu_flux_{infu_flux_version}', 'aes_stage2')
+            infusenet_path = os.path.join(infu_model_path, 'InfuseNetModel')
+            self.infusenet_aes = FluxControlNetModel.from_pretrained(infusenet_path, torch_dtype=torch.bfloat16)
+            infu_model_path = os.path.join('./models/InfiniteYou', f'infu_flux_{infu_flux_version}', 'sim_stage1')
             infusenet_path = os.path.join(infu_model_path, 'InfuseNetModel')
+            self.infusenet_sim = FluxControlNetModel.from_pretrained(infusenet_path, torch_dtype=torch.bfloat16)
             insightface_root_path = './models/InfiniteYou/supports/insightface'
+        self.infusenet_sim.cpu()
+        torch.cuda.empty_cache()
         try:
             pipe = FluxInfuseNetPipeline.from_pretrained(
                 base_model_path,
+                controlnet=self.infusenet_aes,
                 torch_dtype=torch.bfloat16,
             )
         except:
             try:
                 pipe = FluxInfuseNetPipeline.from_single_file(
                     base_model_path,
+                    controlnet=self.infusenet_aes,
                     torch_dtype=torch.bfloat16,
                 )
             except Exception as e:
                 print('\nIf you are using other models, please download them to a local directory and use `base_model_path` to specify the correct path.')
                 exit()
         pipe.to('cuda', torch.bfloat16)
+        # CPU offload controlnet and T5 in advance
         pipe.controlnet.cpu()
+        pipe.text_encoder_2.cpu()
         torch.cuda.empty_cache()
         # pipe.enable_model_cpu_offload()
         self.pipe = pipe
             output_dim=4096,
             ff_mult=4,
         )
+        image_proj_model_path = os.path.join(os.path.dirname(infu_model_path), 'aes_stage2', 'image_proj_model.bin')
         ipm_state_dict = torch.load(image_proj_model_path, map_location="cpu")
         image_proj_model.load_state_dict(ipm_state_dict['image_proj'])
         del ipm_state_dict
         image_proj_model.to('cuda', torch.bfloat16)
         image_proj_model.eval()
+        self.image_proj_model_aes = image_proj_model
+        image_proj_model = Resampler(
+            dim=1280,
+            depth=4,
+            dim_head=64,
+            heads=20,
+            num_queries=num_tokens,
+            embedding_dim=image_emb_dim,
+            output_dim=4096,
+            ff_mult=4,
+        )
+        image_proj_model_path = os.path.join(os.path.dirname(infu_model_path), 'sim_stage1', 'image_proj_model.bin')
+        ipm_state_dict = torch.load(image_proj_model_path, map_location="cpu")
+        image_proj_model.load_state_dict(ipm_state_dict['image_proj'])
+        del ipm_state_dict
+        image_proj_model.to('cpu', torch.bfloat16)
+        image_proj_model.eval()
+        self.image_proj_model_sim = image_proj_model
+        self.image_proj_model = self.image_proj_model_aes
         # Load face encoder
         self.app_640 = FaceAnalysis(name='antelopev2',
         self.arcface_model = init_recognition_model('arcface', device='cuda')
+        # Load LoRAs in advance
+        user_agent = {
+            "file_type": "attn_procs_weights",
+            "framework": "pytorch",
+        }
+        self.loras_state_dict = {}
+        self.loras_state_dict['realism'] = self.pipe._fetch_state_dict(os.path.join(os.path.dirname(insightface_root_path), 'optional_loras', 'flux_realism_lora.safetensors'),
+            weight_name=None, use_safetensors=True, local_files_only=None, cache_dir=None, force_download=False, proxies=None, token=None, revision=None, subfolder=None, user_agent=user_agent, allow_pickle=True)
+        self.loras_state_dict['anti_blur'] = self.pipe._fetch_state_dict(os.path.join(os.path.dirname(insightface_root_path), 'optional_loras', 'flux_anti_blur_lora.safetensors'),
+            weight_name=None, use_safetensors=True, local_files_only=None, cache_dir=None, force_download=False, proxies=None, token=None, revision=None, subfolder=None, user_agent=user_agent, allow_pickle=True)
+    def load_loras_state_dict(self, loras):
+        names, scales = [],[]
+        for lora_name, lora_scale in loras:
+            print(f"loading lora state dict of {lora_name}")
+            self.pipe.load_lora_weights(self.loras_state_dict[lora_name], adapter_name=lora_name)
+            names.append(lora_name)
+            scales.append(lora_scale)
+        if len(names) > 0:
+            self.pipe.set_adapters(names, adapter_weights=scales)
     def load_loras(self, loras):
         names, scales = [],[]
         for lora_path, lora_name, lora_scale in loras:
             if lora_path != "":
                 print(f"loading lora {lora_path}")
+                self.pipe.load_lora_weights(lora_path, adapter_name=lora_name)
                 names.append(lora_name)
                 scales.append(lora_scale)