Update app.py
Browse files
app.py
CHANGED
|
@@ -37,22 +37,28 @@ def apply_rave(audio, model_name):
|
|
| 37 |
"""Apply selected RAVE model to uploaded audio."""
|
| 38 |
model = load_rave_model(model_name)
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
sr = audio[1]
|
| 43 |
|
| 44 |
-
# β
|
| 45 |
-
if
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
sr = 48000
|
| 48 |
|
| 49 |
with torch.no_grad():
|
| 50 |
-
# β
TorchScript models have encode & decode methods
|
| 51 |
z = model.encode(audio_tensor)
|
| 52 |
processed_audio = model.decode(z)
|
| 53 |
|
| 54 |
return (processed_audio.squeeze().cpu().numpy(), sr)
|
| 55 |
|
|
|
|
| 56 |
# π Gradio UI
|
| 57 |
with gr.Blocks() as demo:
|
| 58 |
gr.Markdown("## π RAVE Style Transfer on Stems")
|
|
|
|
| 37 |
"""Apply selected RAVE model to uploaded audio."""
|
| 38 |
model = load_rave_model(model_name)
|
| 39 |
|
| 40 |
+
# β
Unpack properly
|
| 41 |
+
waveform, sr = audio # waveform: np.array [samples, channels]
|
|
|
|
| 42 |
|
| 43 |
+
# β
Convert stereo -> mono if needed
|
| 44 |
+
if waveform.ndim > 1:
|
| 45 |
+
waveform = np.mean(waveform, axis=1)
|
| 46 |
+
|
| 47 |
+
# β
Convert numpy to torch tensor
|
| 48 |
+
audio_tensor = torch.tensor(waveform).unsqueeze(0) # shape: [1, samples]
|
| 49 |
+
|
| 50 |
+
# β
Resample if needed
|
| 51 |
+
if int(sr) != 48000:
|
| 52 |
+
audio_tensor = torchaudio.functional.resample(audio_tensor, int(sr), 48000)
|
| 53 |
sr = 48000
|
| 54 |
|
| 55 |
with torch.no_grad():
|
|
|
|
| 56 |
z = model.encode(audio_tensor)
|
| 57 |
processed_audio = model.decode(z)
|
| 58 |
|
| 59 |
return (processed_audio.squeeze().cpu().numpy(), sr)
|
| 60 |
|
| 61 |
+
|
| 62 |
# π Gradio UI
|
| 63 |
with gr.Blocks() as demo:
|
| 64 |
gr.Markdown("## π RAVE Style Transfer on Stems")
|