Spaces:

ahk-d
/

music-style-transfer-with-RAVE

Running

App Files Files Community

ahk-d commited on Aug 1

Commit

b0f2644

verified ·

1 Parent(s): 57c442c

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -65

app.py CHANGED Viewed

@@ -2,76 +2,153 @@ import gradio as gr
 import torchaudio
 import torch
 import numpy as np
 from huggingface_hub import hf_hub_download
-# ✅ Map of model names to files on Hugging Face
 RAVE_MODELS = {
-    "Guitar": "guitar_iil_b2048_r48000_z16.ts",
-    "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
-    "Organ (Archive)": "organ_archive_b2048_r48000_z16.ts",
-    "Organ (Bach)": "organ_bach_b2048_r48000_z16.ts",
-    "Voice Multivoice": "voice-multi-b2048-r48000-z11.ts",
-    "Birds Dawn Chorus": "birds_dawnchorus_b2048_r48000_z8.ts",
-    "Magnets": "magnets_b2048_r48000_z8.ts",
-    "Whale Songs": "humpbacks_pondbrain_b2048_r48000_z20.ts"
 }
 MODEL_CACHE = {}
-def load_rave_model(model_name):
-    """Load TorchScript RAVE model from Hugging Face Hub."""
-    if model_name in MODEL_CACHE:
-        return MODEL_CACHE[model_name]
-    model_file = hf_hub_download(
-        repo_id="Intelligent-Instruments-Lab/rave-models",
-        filename=RAVE_MODELS[model_name]
-    )
-    model = torch.jit.load(model_file, map_location="cpu")
-    model.eval()
-    MODEL_CACHE[model_name] = model
-    return model
-def apply_rave(audio, model_name):
-    """Apply selected RAVE model to uploaded audio."""
-    model = load_rave_model(model_name)
-    # ✅ Unpack properly
-    waveform, sr = audio  # waveform: np.array [samples, channels]
-    # ✅ Convert stereo -> mono if needed
-    if waveform.ndim > 1:
-        waveform = np.mean(waveform, axis=1)
-    # ✅ Convert numpy to torch tensor
-    audio_tensor = torch.tensor(waveform).unsqueeze(0)  # shape: [1, samples]
-    # ✅ Resample if needed
-    if int(sr) != 48000:
-        audio_tensor = torchaudio.functional.resample(audio_tensor, int(sr), 48000)
-        sr = 48000
-    with torch.no_grad():
-        z = model.encode(audio_tensor)
-        processed_audio = model.decode(z)
-    return (processed_audio.squeeze().cpu().numpy(), sr)
-# 🎛 Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("## 🎛 RAVE Style Transfer on Stems")
-    gr.Markdown("Upload audio, pick a RAVE model, and get a transformed version.")
     with gr.Row():
-        audio_input = gr.Audio(type="numpy", label="Upload Audio", sources=["upload", "microphone"])
-        model_selector = gr.Dropdown(list(RAVE_MODELS.keys()), label="Select Style", value="Guitar")
-    with gr.Row():
-        output_audio = gr.Audio(type="numpy", label="Transformed Audio")
-    process_btn = gr.Button("Apply Style Transfer")
-    process_btn.click(fn=apply_rave, inputs=[audio_input, model_selector], outputs=output_audio)
-demo.launch()

 import torchaudio
 import torch
 import numpy as np
+import os
 from huggingface_hub import hf_hub_download
+# HF Spaces doesn't need this, but keeps local compatibility
+# os.environ["GRADIO_TEMP_DIR"] = "/tmp/gradio_cache"
+# ✅ Updated list: only confirmed existing models
 RAVE_MODELS = {
+    # Models from Intelligent-Instruments-Lab/rave-models
+    "Electric Guitar (IIL)": ("Intelligent-Instruments-Lab/rave-models", "guitar_iil_b2048_r48000_z16.ts"),
+    "Soprano Sax (IIL)": ("Intelligent-Instruments-Lab/rave-models", "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts"),
+    "Organ (Archive IIL)": ("Intelligent-Instruments-Lab/rave-models", "organ_archive_b2048_r48000_z16.ts"),
+    "Organ (Bach IIL)": ("Intelligent-Instruments-Lab/rave-models", "organ_bach_b2048_r48000_z16.ts"),
+    "Magnetic Resonator Piano (IIL)": ("Intelligent-Instruments-Lab/rave-models", "mrp_strengjavera_b2048_r44100_z16.ts"),
+    "Multi-Voice (IIL)": ("Intelligent-Instruments-Lab/rave-models", "voice-multi-b2048-r48000-z11.ts"),
+    "Birds (Dawn Chorus IIL)": ("Intelligent-Instruments-Lab/rave-models", "birds_dawnchorus_b2048_r48000_z8.ts"),
+    "Water (Pond Brain IIL)": ("Intelligent-Instruments-Lab/rave-models", "water_pondbrain_b2048_r48000_z16.ts"),
+    "Marine Mammals (IIL)": ("Intelligent-Instruments-Lab/rave-models", "marinemammals_pondbrain_b2048_r48000_z20.ts"),
+    # Models from shuoyang-zheng/jaspers-rave-models
+    "Guitar Picking (Jasper Causal)": ("shuoyang-zheng/jaspers-rave-models", "guitar_picking_dm_b2048_r44100_z8_causal.ts"),
+    "Singing Voice (Jasper Non-Causal)": ("shuoyang-zheng/jaspers-rave-models", "gtsinger_b2048_r44100_z16_noncausal.ts"),
+    "Drums (Jasper AAM)": ("shuoyang-zheng/jaspers-rave-models", "aam_drum_b2048_r44100_z16_noncausal.ts"),
+    "Bass (Jasper AAM)": ("shuoyang-zheng/jaspers-rave-models", "aam_bass_b2048_r44100_z16_noncausal.ts"),
+    "Strings (Jasper AAM)": ("shuoyang-zheng/jaspers-rave-models", "aam_string_b2048_r44100_z16_noncausal.ts"),
+    "Speech (Jasper Causal)": ("shuoyang-zheng/jaspers-rave-models", "librispeech100_b2048_r44100_z8_causal.ts"),
+    "Brass/Sax (Jasper AAM)": ("shuoyang-zheng/jaspers-rave-models", "aam_brass_sax_b2048_r44100_z8_noncausal.ts"),
+    # Model from lancelotblanchard/rave_percussion
+    "Percussion (Lancelot)": ("lancelotblanchard/rave_percussion", "percussion.ts"),
 }
 MODEL_CACHE = {}
+print("🎛 RAVE Style Transfer - Starting up...")
+def load_rave_model(model_key):
+    if model_key in MODEL_CACHE:
+        return MODEL_CACHE[model_key]
+    print(f"📥 Loading model: {model_key}...")
+    try:
+        repo_id, model_file_name = RAVE_MODELS[model_key]
+        model_file = hf_hub_download(repo_id=repo_id, filename=model_file_name)
+        model = torch.jit.load(model_file, map_location="cpu")
+        model.eval()
+        MODEL_CACHE[model_key] = model
+        print(f"✅ Loaded: {model_key}")
+        return model
+    except Exception as e:
+        print(f"❌ Error loading {model_key}: {str(e)}")
+        raise
+def apply_rave(audio_path, model_name):
+    """
+    Apply RAVE style transfer to audio.
+    Returns tuple (sample_rate, numpy_array) for Gradio.
+    """
+    if not audio_path:
+        return None, "❌ Please upload an audio file."
+    try:
+        print(f"🎵 Processing audio: {os.path.basename(audio_path)} with {model_name}")
+        # Load and preprocess audio
+        waveform, sr = torchaudio.load(audio_path)
+        print(f"📊 Original: {waveform.shape}, {sr}Hz")
+        # Convert to mono if stereo
+        if waveform.shape[0] > 1:
+            print("🔄 Converting stereo to mono")
+            waveform = torch.mean(waveform, dim=0, keepdim=True)
+        # Resample to 48kHz if needed
+        if sr != 48000:
+            print(f"🔄 Resampling from {sr}Hz to 48000Hz")
+            waveform = torchaudio.functional.resample(waveform, sr, 48000)
+            sr = 48000
+        # Add batch dimension
+        waveform = waveform.unsqueeze(0)
+        # Load model and process
+        model = load_rave_model(model_name)
+        print("🤖 Applying RAVE transformation...")
+        with torch.no_grad():
+            z = model.encode(waveform)
+            processed = model.decode(z)
+        # Prepare output
+        processed = processed.squeeze(0)
+        arr = processed.squeeze().cpu().numpy()
+        print("✅ Transformation complete!")
+        return (sr, arr), "✅ Style transfer successful!"
+    except Exception as e:
+        error_msg = f"❌ Error: {str(e)}"
+        print(error_msg)
+        return None, error_msg
+# --- Gradio UI ---
+print("🚀 Creating Gradio interface...")
+with gr.Blocks(theme=gr.themes.Soft(), title="RAVE Style Transfer") as demo:
+    gr.Markdown("# 🎛 RAVE Style Transfer Stem Remixer")
+    gr.Markdown("Transform your audio using AI-powered style transfer. Upload audio and choose an instrument style!")
     with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                type="filepath",
+                label="🎵 Upload Your Audio",
+                sources=["upload", "microphone"]
+            )
+            model_selector = gr.Dropdown(
+                choices=list(RAVE_MODELS.keys()),
+                label="🎸 Select Instrument Style",
+                value="Electric Guitar (IIL)",
+                interactive=True
+            )
+            process_btn = gr.Button("🔄 Apply RAVE Transform", variant="primary", size="lg")
+        with gr.Column():
+            output_audio = gr.Audio(
+                type="numpy",
+                label="🎧 Transformed Audio"
+            )
+            status_output = gr.Textbox(
+                label="📊 Status",
+                interactive=False,
+                value="Ready to transform audio..."
+            )
+    process_btn.click(
+        fn=apply_rave,
+        inputs=[audio_input, model_selector],
+        outputs=[output_audio, status_output]
+    )
+    gr.Markdown("---")
+    gr.Markdown(
+        "<p style='text-align: center; font-size: small;'>"
+        "Powered by RAVE (Realtime Audio Variational autoEncoder) | "
+        "Models from Intelligent Instruments Lab & Community"
+        "</p>"
+    )
+print("🌐 Launching demo...")
+if __name__ == "__main__":
+    demo.launch()