|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
|
import tempfile |
|
|
import torchaudio |
|
|
|
|
|
|
|
|
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3") |
|
|
|
|
|
|
|
|
cola_model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA") |
|
|
cola_tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA") |
|
|
grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=cola_tokenizer) |
|
|
|
|
|
|
|
|
correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction") |
|
|
|
|
|
def process_audio(audio_file): |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
|
|
tmp.write(audio_file.read()) |
|
|
tmp_path = tmp.name |
|
|
|
|
|
|
|
|
transcription = asr_pipeline(tmp_path)["text"] |
|
|
|
|
|
|
|
|
grammar_result = grammar_pipeline(transcription)[0] |
|
|
score_label = grammar_result["label"] |
|
|
score_confidence = grammar_result["score"] |
|
|
|
|
|
|
|
|
corrected_text = correction_pipeline(transcription, max_length=128)[0]["generated_text"] |
|
|
|
|
|
return transcription, f"{score_label} ({score_confidence:.2f})", corrected_text |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=process_audio, |
|
|
inputs=gr.Audio(type="file", label="Upload your .wav file"), |
|
|
outputs=[ |
|
|
gr.Textbox(label="Transcription"), |
|
|
gr.Textbox(label="Grammar Score"), |
|
|
gr.Textbox(label="Grammar Correction") |
|
|
], |
|
|
title="ποΈ Voice Grammar Scorer", |
|
|
description="Upload your voice (WAV file). This app transcribes it, scores grammar, and suggests corrections." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface.launch() |
|
|
|