Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,8 @@ import gradio as gr
|
|
| 2 |
import spaces
|
| 3 |
import yaml
|
| 4 |
import torch
|
| 5 |
-
import librosa
|
|
|
|
| 6 |
from diffusers import DDIMScheduler
|
| 7 |
from transformers import AutoProcessor, ClapModel
|
| 8 |
from model.udit import UDiT
|
|
@@ -98,7 +99,13 @@ def sample_diffusion(mixture, timbre, ddim_steps=50, eta=0, seed=2023, guidance_
|
|
| 98 |
@spaces.GPU
|
| 99 |
def tse(gt_file_input, text_input, num_infer_steps, eta, seed, guidance_scale, guidance_rescale):
|
| 100 |
with torch.no_grad():
|
| 101 |
-
mixture, _ = librosa.load(gt_file_input, sr=sample_rate)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
# Check the length of the audio in samples
|
| 103 |
current_length = len(mixture)
|
| 104 |
target_length = sample_rate * 10
|
|
|
|
| 2 |
import spaces
|
| 3 |
import yaml
|
| 4 |
import torch
|
| 5 |
+
# import librosa
|
| 6 |
+
import torchaudio
|
| 7 |
from diffusers import DDIMScheduler
|
| 8 |
from transformers import AutoProcessor, ClapModel
|
| 9 |
from model.udit import UDiT
|
|
|
|
| 99 |
@spaces.GPU
|
| 100 |
def tse(gt_file_input, text_input, num_infer_steps, eta, seed, guidance_scale, guidance_rescale):
|
| 101 |
with torch.no_grad():
|
| 102 |
+
# mixture, _ = librosa.load(gt_file_input, sr=sample_rate)
|
| 103 |
+
mixture, sr = torchaudio.load(gt_file_input)
|
| 104 |
+
if sr != sample_rate:
|
| 105 |
+
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=sample_rate)
|
| 106 |
+
mixture = resampler(mixture)
|
| 107 |
+
sr = sample_rate
|
| 108 |
+
|
| 109 |
# Check the length of the audio in samples
|
| 110 |
current_length = len(mixture)
|
| 111 |
target_length = sample_rate * 10
|