mayankpuvvala's picture
Create app.py
53b9009 verified
raw
history blame
1.85 kB
import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import tempfile
import torchaudio
# Load Whisper for transcription
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
# Load grammar scoring model
cola_model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-CoLA")
cola_tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-CoLA")
grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=cola_tokenizer)
# Load grammar correction model
correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
def process_audio(audio_file):
# Save uploaded file to temporary path
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
tmp.write(audio_file.read())
tmp_path = tmp.name
# Transcription
transcription = asr_pipeline(tmp_path)["text"]
# Grammar Scoring
grammar_result = grammar_pipeline(transcription)[0]
score_label = grammar_result["label"]
score_confidence = grammar_result["score"]
# Correction
corrected_text = correction_pipeline(transcription, max_length=128)[0]["generated_text"]
return transcription, f"{score_label} ({score_confidence:.2f})", corrected_text
# Gradio Interface
interface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="file", label="Upload your .wav file"),
outputs=[
gr.Textbox(label="Transcription"),
gr.Textbox(label="Grammar Score"),
gr.Textbox(label="Grammar Correction")
],
title="πŸŽ™οΈ Voice Grammar Scorer",
description="Upload your voice (WAV file). This app transcribes it, scores grammar, and suggests corrections."
)
if __name__ == "__main__":
interface.launch()