Spaces:

aryankeesara
/

audiosummarymodel

Sleeping

App Files Files Community

Cryptic commited on Oct 23, 2024

Commit

eb91ddc

1 Parent(s): 90bcc62

test

Browse files

Files changed (2) hide show

app.py +48 -58
requirements.txt +3 -3

app.py CHANGED Viewed

@@ -1,70 +1,60 @@
-import os
 import tempfile
-import json
-import librosa
-import numpy as np
 import soundfile as sf
-import torch
-import gradio as gr
 from transformers import pipeline
-# Load models globally to avoid reloading on every request
-device = 0 if torch.cuda.is_available() else -1
-models = {
-    'transcriber': pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=device, chunk_length_s=30),
-    'summarizer': pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
-}
-def load_and_convert_audio(audio_path):
-    """Load audio using librosa and convert to WAV format"""
-    audio_data, sample_rate = librosa.load(audio_path, sr=16000)  # Whisper expects 16kHz
-    audio_data = audio_data.astype(np.float32)
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_wav:
-        sf.write(temp_wav.name, audio_data, sample_rate, format='WAV')
-        return temp_wav.name
-def process_audio(audio_file):
-    """Process audio file and return transcription and summary"""
-    results = {}
-    try:
-        temp_wav_path = load_and_convert_audio(audio_file.name)
-        # Transcription
-        transcription = models['transcriber'](temp_wav_path, return_timestamps=True)
-        results['transcription'] = transcription['text'] if isinstance(transcription, dict) else ' '.join([chunk['text'] for chunk in transcription])
-        # Summarization
-        text = results['transcription']
-        words = text.split()
-        chunk_size = 1000
-        chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
-        summaries = [models['summarizer'](chunk, max_length=200, min_length=50, truncation=True)[0]['summary_text'] for chunk in chunks]
-        results['summary'] = ' '.join(summaries)
-    except Exception as e:
-        return {'error': str(e)}  # Return error message if something goes wrong
-    finally:
-        if os.path.exists(temp_wav_path):
-            os.unlink(temp_wav_path)
-    return results
-def gradio_interface(audio):
-    """Gradio interface function"""
-    return process_audio(audio)
-# Create Gradio interface
-iface = gr.Interface(
-    fn=gradio_interface,
-    inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio File"),
-    outputs=["json"],
-    title="Audio Transcription and Summarization",
-    description="Upload an audio file to get its transcription and summary."
-)
-if __name__ == "__main__":
-    iface.launch()

+import streamlit as st
 import tempfile
 import soundfile as sf
 from transformers import pipeline
+# Load models
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1)
+summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
+question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1)
+# Upload audio file
+uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])
+if uploaded_file is not None:
+    # Save the uploaded file to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
+        temp_audio_file.write(uploaded_file.getbuffer())
+        temp_audio_path = temp_audio_file.name
+    # Read the audio file using SoundFile
+    try:
+        # Load audio data
+        audio_data, sample_rate = sf.read(temp_audio_path)
+        # Transcribing audio
+        lecture_text = transcriber(temp_audio_path)["text"]
+        # Preprocessing data
+        num_words = len(lecture_text.split())
+        max_length = min(num_words, 1024)  # BART model max input length is 1024 tokens
+        max_length = int(max_length * 0.75)  # Convert max words to approx tokens
+        if max_length > 1024:
+            lecture_text = lecture_text[:int(1024 / 0.75)]  # Truncate to fit the model's token limit
+        # Summarization
+        summary = summarizer(
+            lecture_text,
+            max_length=1024,  # DistilBART max input length is 1024 tokens
+            min_length=int(max_length * 0.1),
+            truncation=True
+        )
+        # Clean up the summary text
+        if not summary[0]["summary_text"].endswith((".", "!", "?")):
+            last_period_index = summary[0]["summary_text"].rfind(".")
+            if last_period_index != -1:
+                summary[0]["summary_text"] = summary[0]["summary_text"][:last_period_index + 1]
+        # Questions Generation
+        context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions."
+        questions = question_generator(context, max_new_tokens=50)
+        # Output
+        st.write("\nSummary:\n", summary[0]["summary_text"])
+        for question in questions:
+            st.write(question["generated_text"])  # Output the generated questions
+    except Exception as e:
+        st.error(f"Error during processing: {str(e)}")

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-gradio
 torch
 soundfile
-transformers
 numpy
-flask

+streamlit
+transformers
 torch
 soundfile
 numpy
+librosa