Spaces:

Kr08
/

ASR

Build error

App Files Files Community

Kr08 commited on Aug 25, 2024

Commit

ae8fbd2

verified ·

1 Parent(s): e269658

updated app.py

Browse files

Files changed (1) hide show

app.py +21 -80

app.py CHANGED Viewed

@@ -1,88 +1,23 @@
-import torch
-import pickle
-import whisper
 import streamlit as st
-import torchaudio as ta
-import numpy as np
 from io import BytesIO
-from transformers import WhisperProcessor, WhisperForConditionalGeneration
-# Set up device and dtype
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.float16 if device == "cuda:0" else torch.float32
-SAMPLING_RATE = 16000
-CHUNK_LENGTH_S = 20  # 30 seconds per chunk
-# Load Whisper model and processor
-processor = WhisperProcessor.from_pretrained("openai/whisper-small")
-model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
 # Title of the app
-st.title("Audio Player with Live Transcription")
-# Sidebar for file uploader and submit button
-st.sidebar.header("Upload Audio Files")
-uploaded_files = st.sidebar.file_uploader("Choose audio files", type=["mp3", "wav"], accept_multiple_files=True)
-submit_button = st.sidebar.button("Submit")
-# Session state to hold data
-if 'audio_files' not in st.session_state:
-    st.session_state.audio_files = []
-    st.session_state.transcriptions = {}
-    st.session_state.translations = {}
-    st.session_state.detected_languages = []
-    st.session_state.waveforms = []
-def detect_language(audio_file):
-    whisper_model = whisper.load_model("small")
-    trimmed_audio = whisper.pad_or_trim(audio_file.squeeze())
-    mel = whisper.log_mel_spectrogram(trimmed_audio).to(whisper_model.device)
-    _, probs = whisper_model.detect_language(mel)
-    detected_lang = max(probs[0], key=probs[0].get)
-    print(f"Detected language: {detected_lang}")
-    return detected_lang
-def process_long_audio(waveform, sampling_rate, task="transcribe", language=None):
-    input_length = waveform.shape[1]
-    chunk_length = int(CHUNK_LENGTH_S * sampling_rate)
-    chunks = [waveform[:, i:i + chunk_length] for i in range(0, input_length, chunk_length)]
-    results = []
-    for chunk in chunks:
-        # import pdb;pdb.set_trace()
-        input_features = processor(chunk[0], sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
-        with torch.no_grad():
-            if task == "translate":
-                forced_decoder_ids = processor.get_decoder_prompt_ids(language=language, task="translate")
-                generated_ids = model.generate(input_features, forced_decoder_ids=forced_decoder_ids)
-            else:
-                generated_ids = model.generate(input_features)
-        transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
-        results.extend(transcription)
-    return " ".join(results)
-# Process uploaded files
-if submit_button and uploaded_files is not None:
-    st.session_state.audio_files = uploaded_files
-    st.session_state.detected_languages = []
-    st.session_state.waveforms = []
-    for uploaded_file in uploaded_files:
-        waveform, sampling_rate = ta.load(BytesIO(uploaded_file.read()))
-        if sampling_rate != SAMPLING_RATE:
-            waveform = ta.functional.resample(waveform, orig_freq=sampling_rate, new_freq=SAMPLING_RATE)
-        st.session_state.waveforms.append(waveform)
-        detected_language = detect_language(waveform)
-        st.session_state.detected_languages.append(detected_language)
 # Display uploaded files and options
 if 'audio_files' in st.session_state and st.session_state.audio_files:
@@ -91,7 +26,7 @@ if 'audio_files' in st.session_state and st.session_state.audio_files:
         with col1:
             st.write(f"**File name**: {uploaded_file.name}")
-            st.audio(BytesIO(uploaded_file.read()), format=uploaded_file.type)
             st.write(f"**Detected Language**: {st.session_state.detected_languages[i]}")
         with col2:
@@ -103,6 +38,10 @@ if 'audio_files' in st.session_state and st.session_state.audio_files:
             if st.session_state.transcriptions.get(i):
                 st.write("**Transcription**:")
                 st.write(st.session_state.transcriptions[i])
             if st.button(f"Translate {uploaded_file.name}"):
                 with st.spinner("Translating..."):
@@ -116,4 +55,6 @@ if 'audio_files' in st.session_state and st.session_state.audio_files:
             if st.session_state.translations.get(i):
                 st.write("**Translation**:")
-                st.write(st.session_state.translations[i])

 import streamlit as st
+import pickle
 from io import BytesIO
+import pyperclip
+from audio_processing import detect_language, process_long_audio, load_and_resample_audio
+from model_utils import load_models
+from config import SAMPLING_RATE
+from llm_utils import generate_answer, summarize_transcript
+# Load models at startup
+load_models()
 # Title of the app
+st.title("Audio Player with Live Transcription and Q&A")
+# ... (previous code remains the same)
+def copy_to_clipboard(text):
+    pyperclip.copy(text)
+    st.success("Copied to clipboard!")
 # Display uploaded files and options
 if 'audio_files' in st.session_state and st.session_state.audio_files:
         with col1:
             st.write(f"**File name**: {uploaded_file.name}")
+            st.audio(uploaded_file, format=uploaded_file.type)
             st.write(f"**Detected Language**: {st.session_state.detected_languages[i]}")
         with col2:
             if st.session_state.transcriptions.get(i):
                 st.write("**Transcription**:")
                 st.write(st.session_state.transcriptions[i])
+                if st.button("Copy Transcription", key=f"copy_transcription_{i}"):
+                    copy_to_clipboard(st.session_state.transcriptions[i])
+                # ... (summarization and Q&A code remains the same)
             if st.button(f"Translate {uploaded_file.name}"):
                 with st.spinner("Translating..."):
             if st.session_state.translations.get(i):
                 st.write("**Translation**:")
+                st.write(st.session_state.translations[i])
+                if st.button("Copy Translation", key=f"copy_translation_{i}"):
+                    copy_to_clipboard(st.session_state.translations[i])