Spaces:

ahk-d
/

music-style-transfer-with-RAVE

Running

App Files Files Community

ahk-d commited on Aug 1

Commit

7383a83

verified ·

1 Parent(s): 76f1409

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -5

app.py CHANGED Viewed

@@ -19,8 +19,28 @@ RAVE_MODELS = {
 MODEL_CACHE = {}
 def load_rave_model(model_name):
-    """Load a RAVE model from Hugging Face or cache."""
     if model_name in MODEL_CACHE:
         return MODEL_CACHE[model_name]
@@ -29,7 +49,7 @@ def load_rave_model(model_name):
         filename=RAVE_MODELS[model_name]
     )
-    model = RAVE.load(model_file)  # RAVE.load assumes wrapper for loading .ts file
     model.eval()
     MODEL_CACHE[model_name] = model
     return model
@@ -42,17 +62,19 @@ def apply_rave(audio, model_name):
     audio_tensor = torch.tensor(audio[0]).unsqueeze(0)  # [1, samples]
     sr = audio[1]
     if sr != 48000:
         audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
         sr = 48000
-    # Pass through model (encode -> decode)
     with torch.no_grad():
         z = model.encode(audio_tensor)
         processed_audio = model.decode(z)
-    processed_audio = processed_audio.squeeze().cpu().numpy()
-    return (processed_audio, sr)
 # 🎛 Gradio Interface
 with gr.Blocks() as demo:

 MODEL_CACHE = {}
+import gradio as gr
+import torchaudio
+import torch
+import numpy as np
+from huggingface_hub import hf_hub_download
+# ✅ Available RAVE models
+RAVE_MODELS = {
+    "Guitar": "guitar_iil_b2048_r48000_z16.ts",
+    "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
+    "Organ (Archive)": "organ_archive_b2048_r48000_z16.ts",
+    "Organ (Bach)": "organ_bach_b2048_r48000_z16.ts",
+    "Voice Multivoice": "voice-multi-b2048-r48000-z11.ts",
+    "Birds Dawn Chorus": "birds_dawnchorus_b2048_r48000_z8.ts",
+    "Magnets": "magnets_b2048_r48000_z8.ts",
+    "Whale Songs": "humpbacks_pondbrain_b2048_r48000_z20.ts"
+}
+MODEL_CACHE = {}
 def load_rave_model(model_name):
+    """Load a TorchScript RAVE model directly from Hugging Face."""
     if model_name in MODEL_CACHE:
         return MODEL_CACHE[model_name]
         filename=RAVE_MODELS[model_name]
     )
+    model = torch.jit.load(model_file, map_location="cpu")
     model.eval()
     MODEL_CACHE[model_name] = model
     return model
     audio_tensor = torch.tensor(audio[0]).unsqueeze(0)  # [1, samples]
     sr = audio[1]
+    # ✅ resample if needed
     if sr != 48000:
         audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
         sr = 48000
     with torch.no_grad():
+        # ✅ pass audio through RAVE TorchScript (encode/decode)
+        # TorchScript models are usually structured like: model.encode(x) / model.decode(z)
         z = model.encode(audio_tensor)
         processed_audio = model.decode(z)
+    return (processed_audio.squeeze().cpu().numpy(), sr)
 # 🎛 Gradio Interface
 with gr.Blocks() as demo: