abdeljalilELmajjodi commited on
Commit
5db0355
Β·
verified Β·
1 Parent(s): a529177

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torchaudio
3
+ from transformers import pipeline
4
+
5
+ # Load only the Moul-Sout-100 model
6
+ asr_pipeline = pipeline("automatic-speech-recognition", model="01Yassine/moul-sout-100")
7
+
8
+ # Adjust generation config if necessary
9
+ asr_pipeline.model.generation_config.input_ids = asr_pipeline.model.generation_config.forced_decoder_ids
10
+ asr_pipeline.model.generation_config.forced_decoder_ids = None
11
+
12
+
13
+ def ensure_mono_16k(audio_path):
14
+ """Load audio, convert to mono + 16kHz, and save a temp version."""
15
+ waveform, sr = torchaudio.load(audio_path)
16
+
17
+ # Convert to mono if necessary
18
+ if waveform.shape[0] > 1:
19
+ waveform = waveform.mean(dim=0, keepdim=True)
20
+
21
+ # Resample to 16kHz if necessary
22
+ if sr != 16000:
23
+ resampler = torchaudio.transforms.Resample(sr, 16000)
24
+ waveform = resampler(waveform)
25
+ sr = 16000
26
+
27
+ tmp_path = "/tmp/processed_16k.wav"
28
+ torchaudio.save(tmp_path, waveform, sr)
29
+ return tmp_path
30
+
31
+
32
+ def transcribe(audio):
33
+ if audio is None:
34
+ return "Please record or upload an audio file."
35
+
36
+ # Process and transcribe
37
+ processed_audio = ensure_mono_16k(audio)
38
+ result = asr_pipeline(processed_audio)["text"]
39
+
40
+ return result
41
+
42
+
43
+ title = "πŸŽ™οΈ Moul-Sout ASR πŸ‡²πŸ‡¦"
44
+ description = """
45
+ **Moul-Sout** model for Darija ASR πŸ‡²πŸ‡¦.
46
+ You can record or upload an audio sample (it will be automatically resampled to 16 kHz mono),
47
+ and view the transcription result below.
48
+ """
49
+
50
+ with gr.Blocks(title=title) as demo:
51
+ gr.Markdown(f"# {title}\n{description}")
52
+
53
+ with gr.Row():
54
+ audio_input = gr.Audio(
55
+ sources=["microphone", "upload"],
56
+ type="filepath",
57
+ label="🎀 Record or Upload Audio (auto 16 kHz mono)"
58
+ )
59
+
60
+ transcribe_btn = gr.Button("πŸš€ Transcribe")
61
+
62
+ output_text = gr.Textbox(label="🟩 Transcription Output")
63
+
64
+ transcribe_btn.click(
65
+ fn=transcribe,
66
+ inputs=[audio_input],
67
+ outputs=[output_text]
68
+ )
69
+
70
+ # Local launch
71
+ if __name__ == "__main__":
72
+ demo.launch()