Spaces:

OpenSound
/

SoloAudio

Running on Zero

OpenSound commited on May 23

Commit

c58c885

verified ·

1 Parent(s): 9f60e99

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,12 +5,15 @@ import torch
 # import librosa
 import torchaudio
 from diffusers import DDIMScheduler
-from transformers import AutoProcessor, ClapModel
 from model.udit import UDiT
 from vae_modules.autoencoder_wrapper import Autoencoder
 import numpy as np
-# from huggingface_hub import snapshot_download
 # snapshot_download(repo_id="laion/larger_clap_general",
 #                   local_dir="./larger_clap_general",
 #                   local_dir_use_symlinks=False)
@@ -27,8 +30,14 @@ with open(diffusion_config, 'r') as fp:
 v_prediction = diff_config["ddim"]["v_prediction"]
-clapmodel = ClapModel.from_pretrained("laion/larger_clap_general").to(device)
 processor = AutoProcessor.from_pretrained('laion/larger_clap_general')
 autoencoder = Autoencoder(autoencoder_path, 'stable_vae', quantization_first=True)
 autoencoder.eval()
 autoencoder.to(device)

 # import librosa
 import torchaudio
 from diffusers import DDIMScheduler
+from transformers import AutoProcessor, ClapModel, ClapConfig
 from model.udit import UDiT
 from vae_modules.autoencoder_wrapper import Autoencoder
 import numpy as np
+from huggingface_hub import hf_hub_download
+clap_bin_path = hf_hub_download("laion/larger_clap_general", "pytorch_model.bin")
+# from huggingface_hub import snapshot_download
 # snapshot_download(repo_id="laion/larger_clap_general",
 #                   local_dir="./larger_clap_general",
 #                   local_dir_use_symlinks=False)
 v_prediction = diff_config["ddim"]["v_prediction"]
+# clapmodel = ClapModel.from_pretrained("laion/larger_clap_general").to(device)
 processor = AutoProcessor.from_pretrained('laion/larger_clap_general')
+clap_config = ClapConfig.from_pretrained("laion/larger_clap_general")  # 只下载 config.json（或用本地路径）
+clapmodel = ClapModel(config)
+clap_ckpt = torch.load(clap_bin_path, map_location='cpu')
+clapmodel.load_state_dict(clap_ckpt)
+clapmodel.to(device)
 autoencoder = Autoencoder(autoencoder_path, 'stable_vae', quantization_first=True)
 autoencoder.eval()
 autoencoder.to(device)