Spaces:

ahk-d
/

music-style-transfer-with-RAVE

Sleeping

App Files Files Community

ahk-d commited on Aug 1

Commit

af310d3

verified ·

1 Parent(s): 7383a83

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -32

app.py CHANGED Viewed

@@ -1,31 +1,10 @@
-import gradio as gr
-import torchaudio
-import torch
-import os
-from rave import RAVE  # Assuming rave.py or pip package is available
-from huggingface_hub import hf_hub_download
-# ✅ Available RAVE models (can expand dynamically from HF repo)
-RAVE_MODELS = {
-    "Guitar": "guitar_iil_b2048_r48000_z16.ts",
-    "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
-    "Organ (Archive)": "organ_archive_b2048_r48000_z16.ts",
-    "Organ (Bach)": "organ_bach_b2048_r48000_z16.ts",
-    "Voice Multivoice": "voice-multi-b2048-r48000-z11.ts",
-    "Birds Dawn Chorus": "birds_dawnchorus_b2048_r48000_z8.ts",
-    "Magnets": "magnets_b2048_r48000_z8.ts",
-    "Whale Songs": "humpbacks_pondbrain_b2048_r48000_z20.ts"
-}
-MODEL_CACHE = {}
 import gradio as gr
 import torchaudio
 import torch
 import numpy as np
 from huggingface_hub import hf_hub_download
-# ✅ Available RAVE models
 RAVE_MODELS = {
     "Guitar": "guitar_iil_b2048_r48000_z16.ts",
     "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
@@ -40,7 +19,7 @@ RAVE_MODELS = {
 MODEL_CACHE = {}
 def load_rave_model(model_name):
-    """Load a TorchScript RAVE model directly from Hugging Face."""
     if model_name in MODEL_CACHE:
         return MODEL_CACHE[model_name]
@@ -55,31 +34,29 @@ def load_rave_model(model_name):
     return model
 def apply_rave(audio, model_name):
-    """Apply selected RAVE style transfer model to uploaded audio."""
     model = load_rave_model(model_name)
-    # Convert numpy audio (from Gradio) to torch tensor
     audio_tensor = torch.tensor(audio[0]).unsqueeze(0)  # [1, samples]
     sr = audio[1]
-    # ✅ resample if needed
     if sr != 48000:
         audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
         sr = 48000
     with torch.no_grad():
-        # ✅ pass audio through RAVE TorchScript (encode/decode)
-        # TorchScript models are usually structured like: model.encode(x) / model.decode(z)
         z = model.encode(audio_tensor)
         processed_audio = model.decode(z)
     return (processed_audio.squeeze().cpu().numpy(), sr)
-# 🎛 Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("## 🎛 RAVE Style Transfer on Stems")
-    gr.Markdown("Upload audio, select a RAVE model, and get a transformed version.")
     with gr.Row():
         audio_input = gr.Audio(type="numpy", label="Upload Audio", sources=["upload", "microphone"])
@@ -88,7 +65,6 @@ with gr.Blocks() as demo:
     with gr.Row():
         output_audio = gr.Audio(type="numpy", label="Transformed Audio")
-    # API + UI trigger
     process_btn = gr.Button("Apply Style Transfer")
     process_btn.click(fn=apply_rave, inputs=[audio_input, model_selector], outputs=output_audio)

 import gradio as gr
 import torchaudio
 import torch
 import numpy as np
 from huggingface_hub import hf_hub_download
+# ✅ Map of model names to files on Hugging Face
 RAVE_MODELS = {
     "Guitar": "guitar_iil_b2048_r48000_z16.ts",
     "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
 MODEL_CACHE = {}
 def load_rave_model(model_name):
+    """Load TorchScript RAVE model from Hugging Face Hub."""
     if model_name in MODEL_CACHE:
         return MODEL_CACHE[model_name]
     return model
 def apply_rave(audio, model_name):
+    """Apply selected RAVE model to uploaded audio."""
     model = load_rave_model(model_name)
+    # Convert numpy audio to torch tensor
     audio_tensor = torch.tensor(audio[0]).unsqueeze(0)  # [1, samples]
     sr = audio[1]
+    # ✅ Resample if needed (most RAVE models expect 48kHz)
     if sr != 48000:
         audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
         sr = 48000
     with torch.no_grad():
+        # ✅ TorchScript models have encode & decode methods
         z = model.encode(audio_tensor)
         processed_audio = model.decode(z)
     return (processed_audio.squeeze().cpu().numpy(), sr)
+# 🎛 Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("## 🎛 RAVE Style Transfer on Stems")
+    gr.Markdown("Upload audio, pick a RAVE model, and get a transformed version.")
     with gr.Row():
         audio_input = gr.Audio(type="numpy", label="Upload Audio", sources=["upload", "microphone"])
     with gr.Row():
         output_audio = gr.Audio(type="numpy", label="Transformed Audio")
     process_btn = gr.Button("Apply Style Transfer")
     process_btn.click(fn=apply_rave, inputs=[audio_input, model_selector], outputs=output_audio)