Spaces:

atlasia
/

MoulSot

Sleeping

App Files Files Community

abdeljalilELmajjodi commited on Oct 5

Commit

5db0355

verified ·

1 Parent(s): a529177

Create app.py

Browse files

Files changed (1) hide show

app.py +72 -0

app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import gradio as gr
+import torchaudio
+from transformers import pipeline
+# Load only the Moul-Sout-100 model
+asr_pipeline = pipeline("automatic-speech-recognition", model="01Yassine/moul-sout-100")
+# Adjust generation config if necessary
+asr_pipeline.model.generation_config.input_ids = asr_pipeline.model.generation_config.forced_decoder_ids
+asr_pipeline.model.generation_config.forced_decoder_ids = None
+def ensure_mono_16k(audio_path):
+    """Load audio, convert to mono + 16kHz, and save a temp version."""
+    waveform, sr = torchaudio.load(audio_path)
+    # Convert to mono if necessary
+    if waveform.shape[0] > 1:
+        waveform = waveform.mean(dim=0, keepdim=True)
+    # Resample to 16kHz if necessary
+    if sr != 16000:
+        resampler = torchaudio.transforms.Resample(sr, 16000)
+        waveform = resampler(waveform)
+        sr = 16000
+    tmp_path = "/tmp/processed_16k.wav"
+    torchaudio.save(tmp_path, waveform, sr)
+    return tmp_path
+def transcribe(audio):
+    if audio is None:
+        return "Please record or upload an audio file."
+    # Process and transcribe
+    processed_audio = ensure_mono_16k(audio)
+    result = asr_pipeline(processed_audio)["text"]
+    return result
+title = "🎙️ Moul-Sout ASR 🇲🇦"
+description = """
+**Moul-Sout** model for Darija ASR 🇲🇦.
+You can record or upload an audio sample (it will be automatically resampled to 16 kHz mono),
+and view the transcription result below.
+"""
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(f"# {title}\n{description}")
+    with gr.Row():
+        audio_input = gr.Audio(
+            sources=["microphone", "upload"],
+            type="filepath",
+            label="🎤 Record or Upload Audio (auto 16 kHz mono)"
+        )
+    transcribe_btn = gr.Button("🚀 Transcribe")
+    output_text = gr.Textbox(label="🟩 Transcription Output")
+    transcribe_btn.click(
+        fn=transcribe,
+        inputs=[audio_input],
+        outputs=[output_text]
+    )
+# Local launch
+if __name__ == "__main__":
+    demo.launch()