Spaces:
Runtime error
Runtime error
app.py and requirements.txt init
Browse files- app.py +68 -0
- requirements.txt +19 -0
app.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import gradio as gr
|
| 4 |
+
from huggingface_hub import hf_hub_download
|
| 5 |
+
from train import init, inference_file
|
| 6 |
+
import tempfile
|
| 7 |
+
|
| 8 |
+
# ===== Basic config =====
|
| 9 |
+
USE_CUDA = torch.cuda.is_available()
|
| 10 |
+
BATCH_SIZE = int(os.getenv("BATCH_SIZE", "12"))
|
| 11 |
+
|
| 12 |
+
# Read model repo and filename from environment variables
|
| 13 |
+
REPO_ID = os.getenv("MODEL_REPO_ID", "chenxie95/Language-Audio-Banquet-ckpt")
|
| 14 |
+
FILENAME = os.getenv("MODEL_FILENAME", "ev-pre-aug.ckpt")
|
| 15 |
+
|
| 16 |
+
# ===== Download & load weights =====
|
| 17 |
+
ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
|
| 18 |
+
system = init(ckpt_path, batch_size=BATCH_SIZE, use_cuda=USE_CUDA)
|
| 19 |
+
|
| 20 |
+
# ===== Inference =====
|
| 21 |
+
def inference(audio_path: str):
|
| 22 |
+
temp_dir = tempfile.gettempdir()
|
| 23 |
+
output_filename = os.path.basename(audio_path).replace('.wav', '_enhanced.wav')
|
| 24 |
+
output_path = os.path.join(temp_dir, output_filename)
|
| 25 |
+
inference_file(system, audio_path, output_path, audio_path)
|
| 26 |
+
return output_path
|
| 27 |
+
|
| 28 |
+
# ===== Gradio UI =====
|
| 29 |
+
with gr.Blocks() as demo:
|
| 30 |
+
gr.Markdown(
|
| 31 |
+
"""
|
| 32 |
+
# 🎧 DCCRN Speech Enhancement (Demo)
|
| 33 |
+
**How to use:** drag & drop a noisy audio clip (or upload / record) → click **Enhance** → listen & download the result.
|
| 34 |
+
**Sample audio:** click a sample below to auto-fill the input, then click **Enhance**.
|
| 35 |
+
"""
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
with gr.Row():
|
| 39 |
+
inp = gr.Audio(
|
| 40 |
+
sources=["upload", "microphone"], # drag & drop supported by default
|
| 41 |
+
type="filepath",
|
| 42 |
+
label="Input: noisy speech (drag & drop or upload / record)"
|
| 43 |
+
)
|
| 44 |
+
out = gr.Audio(
|
| 45 |
+
label="Output: enhanced speech (downloadable)",
|
| 46 |
+
show_download_button=True
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
enhance_btn = gr.Button("Enhance")
|
| 50 |
+
|
| 51 |
+
# On-page sample clips (make sure these files exist in the repo)
|
| 52 |
+
gr.Examples(
|
| 53 |
+
examples=[
|
| 54 |
+
["examples/noisy_1.wav"],
|
| 55 |
+
["examples/noisy_2.wav"],
|
| 56 |
+
["examples/noisy_3.wav"],
|
| 57 |
+
],
|
| 58 |
+
inputs=inp,
|
| 59 |
+
label="Sample audio",
|
| 60 |
+
examples_per_page=3,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Gradio ≥4.44: set concurrency on the event listener
|
| 64 |
+
enhance_btn.click(inference, inputs=inp, outputs=out, concurrency_limit=1)
|
| 65 |
+
|
| 66 |
+
# Queue: keep a small queue to avoid OOM
|
| 67 |
+
demo.queue(max_size=16)
|
| 68 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
einops==0.8.1
|
| 2 |
+
fire==0.7.0
|
| 3 |
+
hear21passt==0.0.26
|
| 4 |
+
hyper_connections==0.2.1
|
| 5 |
+
laion_clap==1.1.7
|
| 6 |
+
librosa==0.9.2
|
| 7 |
+
museval==0.4.1
|
| 8 |
+
numpy==2.3.3
|
| 9 |
+
omegaconf==2.3.0
|
| 10 |
+
packaging==25.0
|
| 11 |
+
pandas==2.3.2
|
| 12 |
+
pytorch_lightning==2.1.0
|
| 13 |
+
rotary_embedding_torch==0.3.5
|
| 14 |
+
scikit_learn==1.7.2
|
| 15 |
+
torch==2.7.0+cu126
|
| 16 |
+
torch_audiomentations==0.12.0
|
| 17 |
+
torchaudio==2.7.0+cu126
|
| 18 |
+
torchmetrics==0.11.4
|
| 19 |
+
tqdm==4.66.4
|