Spaces:

01Yassine
/

MoulSot

Runtime error

App Files Files Community

root commited on Oct 4

Commit

9d3cda7

1 Parent(s): a5f1898

compare

Browse files

Files changed (1) hide show

app.py +39 -13

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 from transformers import pipeline
 # Preload both models
@@ -7,33 +8,58 @@ models = {
     "moulsot_v0.2_1000": pipeline("automatic-speech-recognition", model="01Yassine/moulsot_v0.2_1000")
 }
-# Adjust generation config for both
 for m in models.values():
     m.model.generation_config.input_ids = m.model.generation_config.forced_decoder_ids
     m.model.generation_config.forced_decoder_ids = None
 def transcribe(audio, selected_model):
     if audio is None:
         return "Please record or upload an audio file.", "Please record or upload an audio file."
-    pipe = models[selected_model]
     other_model = [k for k in models if k != selected_model][0]
     # Run inference
-    result_selected = pipe(audio)["text"]
-    result_other = models[other_model](audio)["text"]
     return result_selected, result_other
 title = "🎙️ Moulsot Whisper ASR Comparison"
 description = """
-Compare two fine-tuned Whisper models for **Moroccan ASR**:
 - 🟩 **moulsot_v0.1_2500**
 - 🟦 **moulsot_v0.2_1000**
-You can **record** or **upload** an audio sample, then see transcriptions from both models side by side.
 """
 with gr.Blocks(title=title) as demo:
@@ -41,9 +67,9 @@ with gr.Blocks(title=title) as demo:
     with gr.Row():
         audio_input = gr.Audio(
-            sources=["microphone", "upload"],
-            type="filepath",
-            label="🎤 Record or Upload Audio"
         )
         model_choice = gr.Radio(
             ["moulsot_v0.1_2500", "moulsot_v0.2_1000"],
@@ -54,8 +80,8 @@ with gr.Blocks(title=title) as demo:
     transcribe_btn = gr.Button("🚀 Transcribe")
     with gr.Row():
-        output_selected = gr.Textbox(label="🟩 Model 1 Output")
-        output_other = gr.Textbox(label="🟦 Model 2 Output")
     transcribe_btn.click(
         fn=transcribe,
@@ -63,6 +89,6 @@ with gr.Blocks(title=title) as demo:
         outputs=[output_selected, output_other]
     )
-# For local testing
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+import torchaudio
 from transformers import pipeline
 # Preload both models
     "moulsot_v0.2_1000": pipeline("automatic-speech-recognition", model="01Yassine/moulsot_v0.2_1000")
 }
+# Adjust generation configs for both
 for m in models.values():
     m.model.generation_config.input_ids = m.model.generation_config.forced_decoder_ids
     m.model.generation_config.forced_decoder_ids = None
+def ensure_mono_16k(audio_path):
+    """Load audio, convert to mono + 16kHz, and save a temp version"""
+    waveform, sr = torchaudio.load(audio_path)
+    # Convert to mono if necessary
+    if waveform.shape[0] > 1:
+        waveform = waveform.mean(dim=0, keepdim=True)
+    # Resample to 16kHz if necessary
+    if sr != 16000:
+        resampler = torchaudio.transforms.Resample(sr, 16000)
+        waveform = resampler(waveform)
+        sr = 16000
+    tmp_path = "/tmp/processed_16k.wav"
+    torchaudio.save(tmp_path, waveform, sr)
+    return tmp_path
 def transcribe(audio, selected_model):
     if audio is None:
         return "Please record or upload an audio file.", "Please record or upload an audio file."
+    # Convert uploaded/recorded audio to mono 16kHz
+    processed_audio = ensure_mono_16k(audio)
+    # Selected + other model
+    pipe_selected = models[selected_model]
     other_model = [k for k in models if k != selected_model][0]
+    pipe_other = models[other_model]
     # Run inference
+    result_selected = pipe_selected(processed_audio)["text"]
+    result_other = pipe_other(processed_audio)["text"]
     return result_selected, result_other
 title = "🎙️ Moulsot Whisper ASR Comparison"
 description = """
+Compare two fine-tuned Whisper models for **Arabic ASR**:
 - 🟩 **moulsot_v0.1_2500**
 - 🟦 **moulsot_v0.2_1000**
+You can **record** or **upload** an audio sample (automatically resampled to 16 kHz mono),
+then view transcriptions from both models side by side.
 """
 with gr.Blocks(title=title) as demo:
     with gr.Row():
         audio_input = gr.Audio(
+            sources=["microphone", "upload"],
+            type="filepath",
+            label="🎤 Record or Upload Audio (auto 16 kHz mono)"
         )
         model_choice = gr.Radio(
             ["moulsot_v0.1_2500", "moulsot_v0.2_1000"],
     transcribe_btn = gr.Button("🚀 Transcribe")
     with gr.Row():
+        output_selected = gr.Textbox(label="🟩 Selected Model Output")
+        output_other = gr.Textbox(label="🟦 Other Model Output")
     transcribe_btn.click(
         fn=transcribe,
         outputs=[output_selected, output_other]
     )
+# Local launch
 if __name__ == "__main__":
     demo.launch()