Spaces:

HaawkeNeural
/

framepack-i2v

Running

App Files Files Community

lisonallen commited on Apr 18

Commit

cdbfba8

1 Parent(s): 0f1d758

修复Stateless GPU环境中CUDA初始化问题

Browse files

Files changed (2) hide show

app.py +73 -24
diffusers_helper/memory.py +53 -14

app.py CHANGED Viewed

@@ -30,30 +30,46 @@ from diffusers_helper.hunyuan import encode_prompt_conds, vae_decode, vae_encode
 from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, state_dict_weighted_merge, state_dict_offset_merge, generate_timestamp
 from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
 from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
-from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete
 from diffusers_helper.thread_utils import AsyncStream, async_run
 from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
 from transformers import SiglipImageProcessor, SiglipVisionModel
 from diffusers_helper.clip_vision import hf_clip_vision_encode
 from diffusers_helper.bucket_tools import find_nearest_bucket
-# 获取可用的CUDA内存
-try:
-    if torch.cuda.is_available():
-        free_mem_gb = get_cuda_free_memory_gb(gpu)
-        print(f'Free VRAM {free_mem_gb} GB')
-    else:
         free_mem_gb = 6.0  # 默认值
-        print("CUDA不可用，使用默认的内存设置")
-except Exception as e:
-    free_mem_gb = 6.0  # 默认值
-    print(f"获取CUDA内存时出错: {e}，使用默认的内存设置")
-high_vram = free_mem_gb > 60
-print(f'High-VRAM Mode: {high_vram}')
 # 使用加载模型的函数
 def load_models():
     print("开始加载模型...")
     # 加载模型
@@ -93,7 +109,7 @@ def load_models():
     image_encoder.requires_grad_(False)
     transformer.requires_grad_(False)
-    if torch.cuda.is_available() and gpu.type == 'cuda':
         if not high_vram:
             # DynamicSwapInstaller is same as huggingface's enable_sequential_offload but 3x faster
             DynamicSwapInstaller.install_model(transformer, device=gpu)
@@ -105,28 +121,61 @@ def load_models():
             vae.to(gpu)
             transformer.to(gpu)
-    return text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer
 # 使用Hugging Face Spaces GPU装饰器
 if IN_HF_SPACE and 'spaces' in globals():
     @spaces.GPU
-    def load_models_with_gpu():
         return load_models()
-    print("使用@spaces.GPU装饰器加载模型")
-    text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer = load_models_with_gpu()
-else:
-    print("不使用@spaces.GPU装饰器，直接加载模型")
-    text_encoder, text_encoder_2, tokenizer, tokenizer_2, vae, feature_extractor, image_encoder, transformer = load_models()
-stream = AsyncStream()
-outputs_folder = './outputs/'
-os.makedirs(outputs_folder, exist_ok=True)
 @torch.no_grad()
 def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
     total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
     total_latent_sections = int(max(round(total_latent_sections), 1))

 from diffusers_helper.utils import save_bcthw_as_mp4, crop_or_pad_yield_mask, soft_append_bcthw, resize_and_center_crop, state_dict_weighted_merge, state_dict_offset_merge, generate_timestamp
 from diffusers_helper.models.hunyuan_video_packed import HunyuanVideoTransformer3DModelPacked
 from diffusers_helper.pipelines.k_diffusion_hunyuan import sample_hunyuan
+from diffusers_helper.memory import cpu, gpu, get_cuda_free_memory_gb, move_model_to_device_with_memory_preservation, offload_model_from_device_for_memory_preservation, fake_diffusers_current_device, DynamicSwapInstaller, unload_complete_models, load_model_as_complete, IN_HF_SPACE as MEMORY_IN_HF_SPACE
 from diffusers_helper.thread_utils import AsyncStream, async_run
 from diffusers_helper.gradio.progress_bar import make_progress_bar_css, make_progress_bar_html
 from transformers import SiglipImageProcessor, SiglipVisionModel
 from diffusers_helper.clip_vision import hf_clip_vision_encode
 from diffusers_helper.bucket_tools import find_nearest_bucket
+outputs_folder = './outputs/'
+os.makedirs(outputs_folder, exist_ok=True)
+# 在Spaces环境中，我们延迟所有CUDA操作
+if not IN_HF_SPACE:
+    # 仅在非Spaces环境中获取CUDA内存
+    try:
+        if torch.cuda.is_available():
+            free_mem_gb = get_cuda_free_memory_gb(gpu)
+            print(f'Free VRAM {free_mem_gb} GB')
+        else:
+            free_mem_gb = 6.0  # 默认值
+            print("CUDA不可用，使用默认的内存设置")
+    except Exception as e:
         free_mem_gb = 6.0  # 默认值
+        print(f"获取CUDA内存时出错: {e}，使用默认的内存设置")
+    high_vram = free_mem_gb > 60
+    print(f'High-VRAM Mode: {high_vram}')
+else:
+    # 在Spaces环境中使用默认值
+    print("在Spaces环境中使用默认内存设置")
+    free_mem_gb = 60.0  # 默认在Spaces中使用较高的值
+    high_vram = True
+    print(f'High-VRAM Mode: {high_vram}')
+# 使用models变量存储全局模型引用
+models = {}
 # 使用加载模型的函数
 def load_models():
+    global models
     print("开始加载模型...")
     # 加载模型
     image_encoder.requires_grad_(False)
     transformer.requires_grad_(False)
+    if torch.cuda.is_available():
         if not high_vram:
             # DynamicSwapInstaller is same as huggingface's enable_sequential_offload but 3x faster
             DynamicSwapInstaller.install_model(transformer, device=gpu)
             vae.to(gpu)
             transformer.to(gpu)
+    # 保存到全局变量
+    models = {
+        'text_encoder': text_encoder,
+        'text_encoder_2': text_encoder_2,
+        'tokenizer': tokenizer,
+        'tokenizer_2': tokenizer_2,
+        'vae': vae,
+        'feature_extractor': feature_extractor,
+        'image_encoder': image_encoder,
+        'transformer': transformer
+    }
+    return models
 # 使用Hugging Face Spaces GPU装饰器
 if IN_HF_SPACE and 'spaces' in globals():
     @spaces.GPU
+    def initialize_models():
+        """在@spaces.GPU装饰器内初始化模型"""
         return load_models()
+# 以下函数内部会延迟获取模型
+def get_models():
+    """获取模型，如果尚未加载则加载模型"""
+    global models
+    if not models:
+        if IN_HF_SPACE and 'spaces' in globals():
+            print("使用@spaces.GPU装饰器加载模型")
+            models = initialize_models()
+        else:
+            print("直接加载模型")
+            load_models()
+    return models
+stream = AsyncStream()
 @torch.no_grad()
 def worker(input_image, prompt, n_prompt, seed, total_second_length, latent_window_size, steps, cfg, gs, rs, gpu_memory_preservation, use_teacache):
+    # 获取模型
+    models = get_models()
+    text_encoder = models['text_encoder']
+    text_encoder_2 = models['text_encoder_2']
+    tokenizer = models['tokenizer']
+    tokenizer_2 = models['tokenizer_2']
+    vae = models['vae']
+    feature_extractor = models['feature_extractor']
+    image_encoder = models['image_encoder']
+    transformer = models['transformer']
     total_latent_sections = (total_second_length * 30) / (latent_window_size * 4)
     total_latent_sections = int(max(round(total_latent_sections), 1))

diffusers_helper/memory.py CHANGED Viewed

@@ -10,17 +10,26 @@ IN_HF_SPACE = os.environ.get('SPACE_ID') is not None
 # 设置CPU设备
 cpu = torch.device('cpu')
-# 尝试设置GPU设备，如果不可用则回退到CPU
-try:
-    if torch.cuda.is_available():
-        gpu = torch.device(f'cuda:{torch.cuda.current_device()}')
-    else:
-        print("CUDA不可用，使用CPU作为默认设备")
-        gpu = torch.device('cpu')
-except Exception as e:
-    print(f"初始化CUDA设备时出错: {e}")
-    print("回退到CPU设备")
-    gpu = torch.device('cpu')
 gpu_complete_modules = []
@@ -73,7 +82,11 @@ class DynamicSwapInstaller:
         return
-def fake_diffusers_current_device(model: torch.nn.Module, target_device: torch.device):
     if hasattr(model, 'scale_shift_table'):
         model.scale_shift_table.data = model.scale_shift_table.data.to(target_device)
         return
@@ -88,6 +101,10 @@ def get_cuda_free_memory_gb(device=None):
     if device is None:
         device = gpu
     # 如果不是CUDA设备，返回默认值
     if device.type != 'cuda':
         print("无法获取非CUDA设备的内存信息，返回默认值")
@@ -109,8 +126,17 @@ def get_cuda_free_memory_gb(device=None):
 def move_model_to_device_with_memory_preservation(model, target_device, preserved_memory_gb=0):
     print(f'Moving {model.__class__.__name__} to {target_device} with preserved memory: {preserved_memory_gb} GB')
     # 如果目标设备是CPU或当前在CPU上，直接移动
-    if target_device.type == 'cpu' or gpu.type == 'cpu':
         model.to(device=target_device)
         torch.cuda.empty_cache() if torch.cuda.is_available() else None
         return
@@ -131,8 +157,17 @@ def move_model_to_device_with_memory_preservation(model, target_device, preserve
 def offload_model_from_device_for_memory_preservation(model, target_device, preserved_memory_gb=0):
     print(f'Offloading {model.__class__.__name__} from {target_device} to preserve memory: {preserved_memory_gb} GB')
     # 如果目标设备是CPU或当前在CPU上，直接处理
-    if target_device.type == 'cpu' or gpu.type == 'cpu':
         model.to(device=cpu)
         torch.cuda.empty_cache() if torch.cuda.is_available() else None
         return
@@ -161,6 +196,10 @@ def unload_complete_models(*args):
 def load_model_as_complete(model, target_device, unload=True):
     if unload:
         unload_complete_models()

 # 设置CPU设备
 cpu = torch.device('cpu')
+# 在Stateless GPU环境中，不要在主进程初始化CUDA
+def get_gpu_device():
+    if IN_HF_SPACE:
+        # 在Spaces中将延迟初始化GPU设备
+        return 'cuda'  # 返回字符串，而不是实际初始化设备
+    # 非Spaces环境正常初始化
+    try:
+        if torch.cuda.is_available():
+            return torch.device(f'cuda:{torch.cuda.current_device()}')
+        else:
+            print("CUDA不可用，使用CPU作为默认设备")
+            return torch.device('cpu')
+    except Exception as e:
+        print(f"初始化CUDA设备时出错: {e}")
+        print("回退到CPU设备")
+        return torch.device('cpu')
+# 保存一个字符串表示，而不是实际的设备对象
+gpu = get_gpu_device()
 gpu_complete_modules = []
         return
+def fake_diffusers_current_device(model: torch.nn.Module, target_device):
+    # 转换字符串设备为torch.device
+    if isinstance(target_device, str):
+        target_device = torch.device(target_device)
     if hasattr(model, 'scale_shift_table'):
         model.scale_shift_table.data = model.scale_shift_table.data.to(target_device)
         return
     if device is None:
         device = gpu
+    # 如果是字符串，转换为设备
+    if isinstance(device, str):
+        device = torch.device(device)
     # 如果不是CUDA设备，返回默认值
     if device.type != 'cuda':
         print("无法获取非CUDA设备的内存信息，返回默认值")
 def move_model_to_device_with_memory_preservation(model, target_device, preserved_memory_gb=0):
     print(f'Moving {model.__class__.__name__} to {target_device} with preserved memory: {preserved_memory_gb} GB')
+    # 如果是字符串，转换为设备
+    if isinstance(target_device, str):
+        target_device = torch.device(target_device)
+    # 如果gpu是字符串，转换为设备
+    gpu_device = gpu
+    if isinstance(gpu_device, str):
+        gpu_device = torch.device(gpu_device)
     # 如果目标设备是CPU或当前在CPU上，直接移动
+    if target_device.type == 'cpu' or gpu_device.type == 'cpu':
         model.to(device=target_device)
         torch.cuda.empty_cache() if torch.cuda.is_available() else None
         return
 def offload_model_from_device_for_memory_preservation(model, target_device, preserved_memory_gb=0):
     print(f'Offloading {model.__class__.__name__} from {target_device} to preserve memory: {preserved_memory_gb} GB')
+    # 如果是字符串，转换为设备
+    if isinstance(target_device, str):
+        target_device = torch.device(target_device)
+    # 如果gpu是字符串，转换为设备
+    gpu_device = gpu
+    if isinstance(gpu_device, str):
+        gpu_device = torch.device(gpu_device)
     # 如果目标设备是CPU或当前在CPU上，直接处理
+    if target_device.type == 'cpu' or gpu_device.type == 'cpu':
         model.to(device=cpu)
         torch.cuda.empty_cache() if torch.cuda.is_available() else None
         return
 def load_model_as_complete(model, target_device, unload=True):
+    # 如果是字符串，转换为设备
+    if isinstance(target_device, str):
+        target_device = torch.device(target_device)
     if unload:
         unload_complete_models()