Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import json | |
| import librosa | |
| import numpy as np | |
| import soundfile as sf | |
| import torch | |
| import gradio as gr | |
| from transformers import pipeline | |
| # Load models globally to avoid reloading on every request | |
| device = 0 if torch.cuda.is_available() else -1 | |
| models = { | |
| 'transcriber': pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=device, chunk_length_s=30), | |
| 'summarizer': pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device) | |
| } | |
| def load_and_convert_audio(audio_path): | |
| """Load audio using librosa and convert to WAV format""" | |
| audio_data, sample_rate = librosa.load(audio_path, sr=16000) # Whisper expects 16kHz | |
| audio_data = audio_data.astype(np.float32) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_wav: | |
| sf.write(temp_wav.name, audio_data, sample_rate, format='WAV') | |
| return temp_wav.name | |
| def process_audio(audio_file): | |
| """Process audio file and return transcription and summary""" | |
| results = {} | |
| try: | |
| temp_wav_path = load_and_convert_audio(audio_file.name) | |
| # Transcription | |
| transcription = models['transcriber'](temp_wav_path, return_timestamps=True) | |
| results['transcription'] = transcription['text'] if isinstance(transcription, dict) else ' '.join([chunk['text'] for chunk in transcription]) | |
| # Summarization | |
| text = results['transcription'] | |
| words = text.split() | |
| chunk_size = 1000 | |
| chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)] | |
| summaries = [models['summarizer'](chunk, max_length=200, min_length=50, truncation=True)[0]['summary_text'] for chunk in chunks] | |
| results['summary'] = ' '.join(summaries) | |
| except Exception as e: | |
| return {'error': str(e)} # Return error message if something goes wrong | |
| finally: | |
| if os.path.exists(temp_wav_path): | |
| os.unlink(temp_wav_path) | |
| return results | |
| def gradio_interface(audio): | |
| """Gradio interface function""" | |
| return process_audio(audio) | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=gradio_interface, | |
| inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio File"), | |
| outputs=["json"], | |
| title="Audio Transcription and Summarization", | |
| description="Upload an audio file to get its transcription and summary." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |