Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as gr | |
| from uuid import uuid4 | |
| from pipelines.pipeline import InferencePipeline | |
| TITLE = """ | |
| <div style="text-align: center; max-width: 650px; margin: 0 auto;"> | |
| <div | |
| style=" | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 0.8rem; | |
| font-size: 1.75rem; | |
| " | |
| > | |
| <h1 style="font-weight: 900; margin-bottom: 7px;"> | |
| Auto-AVSR: Audio-Visual Speech Recognition | |
| </h1> | |
| </div> | |
| <p style="margin-bottom: 10px; font-size: 94%"> | |
| Want to recognize content in a noisy environment?<br>Our Auto-AVSR models are here to transcribe your answers from audio or visual information! | |
| </p> | |
| </div> | |
| """ | |
| ARTICLE = """ | |
| <div style="text-align: center; max-width: 650px; margin: 0 auto;"> | |
| <p> | |
| Want to look into models? You can find our [<a href="https://github.com/mpc001/auto_avsr">training code</a>] and [<a href="https://arxiv.org/abs/2303.14307">paper</a>]. | |
| </p> | |
| <p> | |
| The inference is performed on the CPU. You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Colab GPU</a> | |
| </p> | |
| <p> | |
| We share this demo only for non-commercial purposes. | |
| </p> | |
| </div> | |
| """ | |
| CSS = """ | |
| #col-container {margin-left: auto; margin-right: auto;} | |
| a {text-decoration-line: underline; font-weight: 600;} | |
| .animate-spin { | |
| animation: spin 1s linear infinite; | |
| } | |
| @keyframes spin { | |
| from { transform: rotate(0deg); } | |
| to { transform: rotate(360deg); } | |
| } | |
| #share-btn-container { | |
| display: flex; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem; | |
| } | |
| #share-btn { | |
| all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important; | |
| } | |
| #share-btn * { | |
| all: unset; | |
| } | |
| #share-btn-container div:nth-child(-n+2){ | |
| width: auto !important; | |
| min-height: 0px !important; | |
| } | |
| #share-btn-container .wrap { | |
| display: none !important; | |
| } | |
| """ | |
| FFMPEG_COMMAND = "-loglevel error -y -r 25 -pix_fmt yuv420p -f mp4" | |
| pipelines = { | |
| "VSR(mediapipe)": InferencePipeline("./configs/LRS3_V_WER19.1.ini", device="cpu", face_track=True, detector="mediapipe"), | |
| "ASR": InferencePipeline("./configs/LRS3_A_WER1.0.ini", device="cpu", face_track=True, detector="mediapipe"), | |
| "AVSR(mediapipe)": InferencePipeline("./configs/LRS3_AV_WER0.9.ini", device="cpu", face_track=True, detector="mediapipe") | |
| } | |
| def fn(pipeline_type, filename): | |
| directory = "./tmp" | |
| if not os.path.exists(directory): | |
| os.makedirs(directory) | |
| dst_filename = os.path.join(directory, str(uuid4())[:8]+".mp4") | |
| command_string = f"ffmpeg -i {filename} {FFMPEG_COMMAND} {dst_filename}" | |
| os.system(command_string) | |
| selected_pipeline_instance = pipelines[pipeline_type] | |
| landmarks = selected_pipeline_instance.process_landmarks(dst_filename, landmarks_filename=None) | |
| data = selected_pipeline_instance.dataloader.load_data(dst_filename, landmarks) | |
| transcript = selected_pipeline_instance.model.infer(data) | |
| return transcript | |
| demo = gr.Blocks(css=CSS) | |
| with demo: | |
| gr.HTML(TITLE) | |
| dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model") | |
| video_file = gr.Video(label="INPUT VIDEO", include_audio=True) | |
| text = gr.Textbox(label="PREDICTION") | |
| btn = gr.Button("Submit").style(full_width=True) | |
| btn.click(fn, inputs=[dropdown_list, video_file], outputs=text) | |
| gr.HTML(ARTICLE) | |
| demo.launch() |