Spaces:
Sleeping
Sleeping
File size: 2,463 Bytes
cb9426c 10cfa3b cb9426c 5a5050b 10cfa3b 90bcc62 10cfa3b 59ff216 10cfa3b ce2a837 cb9426c 10cfa3b 5a5050b 90bcc62 10cfa3b cb9426c 6440aaf 90bcc62 6440aaf 5a5050b 6440aaf 90bcc62 6440aaf 394213a 10cfa3b 394213a 90bcc62 6440aaf 90bcc62 5a5050b cb9426c 90bcc62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import tempfile
import json
import librosa
import numpy as np
import soundfile as sf
import torch
import gradio as gr
from transformers import pipeline
# Load models globally to avoid reloading on every request
device = 0 if torch.cuda.is_available() else -1
models = {
'transcriber': pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=device, chunk_length_s=30),
'summarizer': pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
}
def load_and_convert_audio(audio_path):
"""Load audio using librosa and convert to WAV format"""
audio_data, sample_rate = librosa.load(audio_path, sr=16000) # Whisper expects 16kHz
audio_data = audio_data.astype(np.float32)
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_wav:
sf.write(temp_wav.name, audio_data, sample_rate, format='WAV')
return temp_wav.name
def process_audio(audio_file):
"""Process audio file and return transcription and summary"""
results = {}
try:
temp_wav_path = load_and_convert_audio(audio_file.name)
# Transcription
transcription = models['transcriber'](temp_wav_path, return_timestamps=True)
results['transcription'] = transcription['text'] if isinstance(transcription, dict) else ' '.join([chunk['text'] for chunk in transcription])
# Summarization
text = results['transcription']
words = text.split()
chunk_size = 1000
chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
summaries = [models['summarizer'](chunk, max_length=200, min_length=50, truncation=True)[0]['summary_text'] for chunk in chunks]
results['summary'] = ' '.join(summaries)
except Exception as e:
return {'error': str(e)} # Return error message if something goes wrong
finally:
if os.path.exists(temp_wav_path):
os.unlink(temp_wav_path)
return results
def gradio_interface(audio):
"""Gradio interface function"""
return process_audio(audio)
# Create Gradio interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio File"),
outputs=["json"],
title="Audio Transcription and Summarization",
description="Upload an audio file to get its transcription and summary."
)
if __name__ == "__main__":
iface.launch() |