Jihuai commited on
Commit
9f787c6
·
1 Parent(s): d572f56

app.py and requirements.txt init

Browse files
Files changed (2) hide show
  1. app.py +68 -0
  2. requirements.txt +19 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from huggingface_hub import hf_hub_download
5
+ from train import init, inference_file
6
+ import tempfile
7
+
8
+ # ===== Basic config =====
9
+ USE_CUDA = torch.cuda.is_available()
10
+ BATCH_SIZE = int(os.getenv("BATCH_SIZE", "12"))
11
+
12
+ # Read model repo and filename from environment variables
13
+ REPO_ID = os.getenv("MODEL_REPO_ID", "chenxie95/Language-Audio-Banquet-ckpt")
14
+ FILENAME = os.getenv("MODEL_FILENAME", "ev-pre-aug.ckpt")
15
+
16
+ # ===== Download & load weights =====
17
+ ckpt_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
18
+ system = init(ckpt_path, batch_size=BATCH_SIZE, use_cuda=USE_CUDA)
19
+
20
+ # ===== Inference =====
21
+ def inference(audio_path: str):
22
+ temp_dir = tempfile.gettempdir()
23
+ output_filename = os.path.basename(audio_path).replace('.wav', '_enhanced.wav')
24
+ output_path = os.path.join(temp_dir, output_filename)
25
+ inference_file(system, audio_path, output_path, audio_path)
26
+ return output_path
27
+
28
+ # ===== Gradio UI =====
29
+ with gr.Blocks() as demo:
30
+ gr.Markdown(
31
+ """
32
+ # 🎧 DCCRN Speech Enhancement (Demo)
33
+ **How to use:** drag & drop a noisy audio clip (or upload / record) → click **Enhance** → listen & download the result.
34
+ **Sample audio:** click a sample below to auto-fill the input, then click **Enhance**.
35
+ """
36
+ )
37
+
38
+ with gr.Row():
39
+ inp = gr.Audio(
40
+ sources=["upload", "microphone"], # drag & drop supported by default
41
+ type="filepath",
42
+ label="Input: noisy speech (drag & drop or upload / record)"
43
+ )
44
+ out = gr.Audio(
45
+ label="Output: enhanced speech (downloadable)",
46
+ show_download_button=True
47
+ )
48
+
49
+ enhance_btn = gr.Button("Enhance")
50
+
51
+ # On-page sample clips (make sure these files exist in the repo)
52
+ gr.Examples(
53
+ examples=[
54
+ ["examples/noisy_1.wav"],
55
+ ["examples/noisy_2.wav"],
56
+ ["examples/noisy_3.wav"],
57
+ ],
58
+ inputs=inp,
59
+ label="Sample audio",
60
+ examples_per_page=3,
61
+ )
62
+
63
+ # Gradio ≥4.44: set concurrency on the event listener
64
+ enhance_btn.click(inference, inputs=inp, outputs=out, concurrency_limit=1)
65
+
66
+ # Queue: keep a small queue to avoid OOM
67
+ demo.queue(max_size=16)
68
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ einops==0.8.1
2
+ fire==0.7.0
3
+ hear21passt==0.0.26
4
+ hyper_connections==0.2.1
5
+ laion_clap==1.1.7
6
+ librosa==0.9.2
7
+ museval==0.4.1
8
+ numpy==2.3.3
9
+ omegaconf==2.3.0
10
+ packaging==25.0
11
+ pandas==2.3.2
12
+ pytorch_lightning==2.1.0
13
+ rotary_embedding_torch==0.3.5
14
+ scikit_learn==1.7.2
15
+ torch==2.7.0+cu126
16
+ torch_audiomentations==0.12.0
17
+ torchaudio==2.7.0+cu126
18
+ torchmetrics==0.11.4
19
+ tqdm==4.66.4