Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import torchaudio | |
| from transformers import pipeline | |
| # Load pidgin model via pipeline | |
| transcriber = pipeline("automatic-speech-recognition", model="asr-nigerian-pidgin/pidgin-wav2vec2-xlsr53") | |
| # Transcription function | |
| # def transcribe(audio): | |
| # if audio is None: | |
| # return "No audio provided." | |
| # sr, y = audio | |
| # # Convert to mono if stereo | |
| # if y.ndim > 1: | |
| # y = y.mean(axis=1) | |
| # y = y.astype(np.float32) | |
| # y /= np.max(np.abs(y)) | |
| # return transcriber({"sampling_rate": sr, "raw": y})["text"] | |
| def transcribe(audio_filepath): | |
| if audio_filepath is None: | |
| return "" | |
| # load & preprocess | |
| waveform, sr = torchaudio.load(audio_filepath) | |
| if waveform.shape[0] > 1: | |
| waveform = waveform.mean(dim=0, keepdim=True) | |
| audio = waveform.squeeze().numpy().astype(np.float32) | |
| audio /= np.max(np.abs(audio)) + 1e-9 | |
| return transcriber({"sampling_rate": sr, "raw": audio})["text"] | |
| # Define the Gradio UI components | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π£οΈ Nigerian Pidgin ASR Demo") | |
| gr.Markdown("""Upload or record audio in Nigerian Pidgin to get transcription. This Demo uses the | |
| Nigerian pidgin ASR checkpoint -[Pidgin-Wav2Vec2-XLSR53](https://huggingface.co/asr-nigerian-pidgin/pidgin-wav2vec2-xlsr53) | |
| and π€ Transformers to transcribe audio files of max 30s length. | |
| """) | |
| with gr.Column(): | |
| audio_in = gr.Audio( | |
| label="π€ Record or upload your audio", | |
| type="filepath", | |
| sources=["upload", "microphone"], | |
| interactive=True, | |
| min_length=1, | |
| max_length=31 | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("Submit") | |
| #clear_btn = gr.Button("Clear") | |
| with gr.Column(): | |
| transcription_txt = gr.Textbox( | |
| label="π Transcription", | |
| interactive=False, | |
| show_label=True, | |
| show_copy_button=True, | |
| ) | |
| with gr.Row(): | |
| flag_btn = gr.Button("π© Flag this output as incorrect", size="sm") | |
| share_btn = gr.Button("π Share", size="sm") | |
| # Button wiring: | |
| submit_btn.click(fn=transcribe, inputs=audio_in, outputs=transcription_txt) | |
| flag_btn.click(fn=lambda: "Thank you for your feedback.", inputs=None, outputs=transcription_txt) | |
| #clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[audio_in, transcription_txt]) | |
| demo.launch(share=True) | |