root commited on
Commit
9d3cda7
Β·
1 Parent(s): a5f1898
Files changed (1) hide show
  1. app.py +39 -13
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  from transformers import pipeline
3
 
4
  # Preload both models
@@ -7,33 +8,58 @@ models = {
7
  "moulsot_v0.2_1000": pipeline("automatic-speech-recognition", model="01Yassine/moulsot_v0.2_1000")
8
  }
9
 
10
- # Adjust generation config for both
11
  for m in models.values():
12
  m.model.generation_config.input_ids = m.model.generation_config.forced_decoder_ids
13
  m.model.generation_config.forced_decoder_ids = None
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def transcribe(audio, selected_model):
17
  if audio is None:
18
  return "Please record or upload an audio file.", "Please record or upload an audio file."
 
 
 
19
 
20
- pipe = models[selected_model]
 
21
  other_model = [k for k in models if k != selected_model][0]
 
22
 
23
  # Run inference
24
- result_selected = pipe(audio)["text"]
25
- result_other = models[other_model](audio)["text"]
26
-
27
  return result_selected, result_other
28
 
29
 
30
  title = "πŸŽ™οΈ Moulsot Whisper ASR Comparison"
31
  description = """
32
- Compare two fine-tuned Whisper models for **Moroccan ASR**:
33
  - 🟩 **moulsot_v0.1_2500**
34
  - 🟦 **moulsot_v0.2_1000**
35
 
36
- You can **record** or **upload** an audio sample, then see transcriptions from both models side by side.
 
37
  """
38
 
39
  with gr.Blocks(title=title) as demo:
@@ -41,9 +67,9 @@ with gr.Blocks(title=title) as demo:
41
 
42
  with gr.Row():
43
  audio_input = gr.Audio(
44
- sources=["microphone", "upload"],
45
- type="filepath",
46
- label="🎀 Record or Upload Audio"
47
  )
48
  model_choice = gr.Radio(
49
  ["moulsot_v0.1_2500", "moulsot_v0.2_1000"],
@@ -54,8 +80,8 @@ with gr.Blocks(title=title) as demo:
54
  transcribe_btn = gr.Button("πŸš€ Transcribe")
55
 
56
  with gr.Row():
57
- output_selected = gr.Textbox(label="🟩 Model 1 Output")
58
- output_other = gr.Textbox(label="🟦 Model 2 Output")
59
 
60
  transcribe_btn.click(
61
  fn=transcribe,
@@ -63,6 +89,6 @@ with gr.Blocks(title=title) as demo:
63
  outputs=[output_selected, output_other]
64
  )
65
 
66
- # For local testing
67
  if __name__ == "__main__":
68
  demo.launch()
 
1
  import gradio as gr
2
+ import torchaudio
3
  from transformers import pipeline
4
 
5
  # Preload both models
 
8
  "moulsot_v0.2_1000": pipeline("automatic-speech-recognition", model="01Yassine/moulsot_v0.2_1000")
9
  }
10
 
11
+ # Adjust generation configs for both
12
  for m in models.values():
13
  m.model.generation_config.input_ids = m.model.generation_config.forced_decoder_ids
14
  m.model.generation_config.forced_decoder_ids = None
15
 
16
 
17
+ def ensure_mono_16k(audio_path):
18
+ """Load audio, convert to mono + 16kHz, and save a temp version"""
19
+ waveform, sr = torchaudio.load(audio_path)
20
+
21
+ # Convert to mono if necessary
22
+ if waveform.shape[0] > 1:
23
+ waveform = waveform.mean(dim=0, keepdim=True)
24
+
25
+ # Resample to 16kHz if necessary
26
+ if sr != 16000:
27
+ resampler = torchaudio.transforms.Resample(sr, 16000)
28
+ waveform = resampler(waveform)
29
+ sr = 16000
30
+
31
+ tmp_path = "/tmp/processed_16k.wav"
32
+ torchaudio.save(tmp_path, waveform, sr)
33
+ return tmp_path
34
+
35
+
36
  def transcribe(audio, selected_model):
37
  if audio is None:
38
  return "Please record or upload an audio file.", "Please record or upload an audio file."
39
+
40
+ # Convert uploaded/recorded audio to mono 16kHz
41
+ processed_audio = ensure_mono_16k(audio)
42
 
43
+ # Selected + other model
44
+ pipe_selected = models[selected_model]
45
  other_model = [k for k in models if k != selected_model][0]
46
+ pipe_other = models[other_model]
47
 
48
  # Run inference
49
+ result_selected = pipe_selected(processed_audio)["text"]
50
+ result_other = pipe_other(processed_audio)["text"]
51
+
52
  return result_selected, result_other
53
 
54
 
55
  title = "πŸŽ™οΈ Moulsot Whisper ASR Comparison"
56
  description = """
57
+ Compare two fine-tuned Whisper models for **Arabic ASR**:
58
  - 🟩 **moulsot_v0.1_2500**
59
  - 🟦 **moulsot_v0.2_1000**
60
 
61
+ You can **record** or **upload** an audio sample (automatically resampled to 16 kHz mono),
62
+ then view transcriptions from both models side by side.
63
  """
64
 
65
  with gr.Blocks(title=title) as demo:
 
67
 
68
  with gr.Row():
69
  audio_input = gr.Audio(
70
+ sources=["microphone", "upload"],
71
+ type="filepath",
72
+ label="🎀 Record or Upload Audio (auto 16 kHz mono)"
73
  )
74
  model_choice = gr.Radio(
75
  ["moulsot_v0.1_2500", "moulsot_v0.2_1000"],
 
80
  transcribe_btn = gr.Button("πŸš€ Transcribe")
81
 
82
  with gr.Row():
83
+ output_selected = gr.Textbox(label="🟩 Selected Model Output")
84
+ output_other = gr.Textbox(label="🟦 Other Model Output")
85
 
86
  transcribe_btn.click(
87
  fn=transcribe,
 
89
  outputs=[output_selected, output_other]
90
  )
91
 
92
+ # Local launch
93
  if __name__ == "__main__":
94
  demo.launch()