Spaces:
Running
Running
| import os | |
| import gradio as gr | |
| import torch | |
| from transformers import pipeline | |
| title = "Transcribe speech in several languages" | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german") | |
| asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-medium", device=device) | |
| def transcribeFile(inputlang, audio_path : str) -> str: | |
| #transcription = asr_pipe_audio2Text_Ge(audio_path) | |
| #transcription = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"transcribe"}) | |
| if inputlang == "Auto Detect": | |
| transcription = asr_pipe_whisper(audio_path, chunk_length_s=10, stride_length_s=(4, 2), generate_kwargs={"task":"transcribe"}, batch_size=32) | |
| elif inputlang == "German": | |
| transcription = asr_pipe_audio2Text_Ge(audio_path, chunk_length_s=10, stride_length_s=(4, 2), batch_size=32) | |
| return transcription["text"] | |
| def translateAudio(audio_path): | |
| translationOutput = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"translate"}) | |
| return translationOutput | |
| def transcribeFileMulti(inputlang, audio_path : str) -> str: | |
| if inputlang == "English": | |
| transcription = asr_pipe_whisper(audio_path) | |
| elif inputlang == "German": | |
| transcription = asr_pipe_audio2Text_Ge(audio_path) | |
| translation = translateAudio(audio_path) | |
| t1 = transcription["text"] | |
| t2 = translation["text"] | |
| output = t1+t2 | |
| return output #transcription["text"] | |
| app1 = gr.Interface( | |
| fn=transcribeFile, | |
| #inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"), | |
| inputs=[gr.Radio(["Auto Detect", "German"], value="Auto Detect", label="Source Language", info="Select the language of the speech you want to transcribe"), | |
| gr.Audio(source="upload", type="filepath",label="Upload audio file")], | |
| outputs="text", | |
| title=title | |
| ) | |
| app2 = gr.Interface( | |
| fn=transcribeFileMulti, | |
| inputs=[gr.Radio(["Auto Detect", "German"], value="Auto Detect", label="Source Language", info="Select the language of the speech you want to transcribe"), | |
| gr.Audio(source="microphone", type="filepath")], | |
| outputs="text", | |
| title=title | |
| ) | |
| demo = gr.TabbedInterface([app1, app2], ["Audio File", "Microphone"]) | |
| if __name__ == "__main__": | |
| demo.launch() | |