Cryptic
test
90bcc62
raw
history blame
2.46 kB
import os
import tempfile
import json
import librosa
import numpy as np
import soundfile as sf
import torch
import gradio as gr
from transformers import pipeline
# Load models globally to avoid reloading on every request
device = 0 if torch.cuda.is_available() else -1
models = {
'transcriber': pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=device, chunk_length_s=30),
'summarizer': pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
}
def load_and_convert_audio(audio_path):
"""Load audio using librosa and convert to WAV format"""
audio_data, sample_rate = librosa.load(audio_path, sr=16000) # Whisper expects 16kHz
audio_data = audio_data.astype(np.float32)
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_wav:
sf.write(temp_wav.name, audio_data, sample_rate, format='WAV')
return temp_wav.name
def process_audio(audio_file):
"""Process audio file and return transcription and summary"""
results = {}
try:
temp_wav_path = load_and_convert_audio(audio_file.name)
# Transcription
transcription = models['transcriber'](temp_wav_path, return_timestamps=True)
results['transcription'] = transcription['text'] if isinstance(transcription, dict) else ' '.join([chunk['text'] for chunk in transcription])
# Summarization
text = results['transcription']
words = text.split()
chunk_size = 1000
chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
summaries = [models['summarizer'](chunk, max_length=200, min_length=50, truncation=True)[0]['summary_text'] for chunk in chunks]
results['summary'] = ' '.join(summaries)
except Exception as e:
return {'error': str(e)} # Return error message if something goes wrong
finally:
if os.path.exists(temp_wav_path):
os.unlink(temp_wav_path)
return results
def gradio_interface(audio):
"""Gradio interface function"""
return process_audio(audio)
# Create Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio File"),
outputs=["json"],
title="Audio Transcription and Summarization",
description="Upload an audio file to get its transcription and summary."
)
if __name__ == "__main__":
iface.launch()