Spaces:

Issamohammed
/

Transcriber

Running

App Files Files Community

Issamohammed commited on Apr 17

Commit

37f7d1f

verified ·

1 Parent(s): be25d7c

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -45

app.py CHANGED Viewed

@@ -1,48 +1,69 @@
 import os
 import torch
 import gradio as gr
 from pydub import AudioSegment
 from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from pathlib import Path
 from tempfile import NamedTemporaryFile
 from datetime import timedelta
 import time
 # Configuration
 MODEL_ID = "KBLab/kb-whisper-large"
 CHUNK_DURATION_MS = 10000
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
 # Initialize model and pipeline
 def initialize_pipeline():
-    model = AutoModelForSpeechSeq2Seq.from_pretrained(
-        MODEL_ID,
-        torch_dtype=TORCH_DTYPE,
-        low_cpu_mem_usage=True
-    ).to(DEVICE)
-    processor = AutoProcessor.from_pretrained(MODEL_ID)
-    return pipeline(
-        "automatic-speech-recognition",
-        model=model,
-        tokenizer=processor.tokenizer,
-        feature_extractor=processor.feature_extractor,
-        device=DEVICE,
-        torch_dtype=TORCH_DTYPE,
-        model_kwargs={"use_flash_attention_2": torch.cuda.is_available()}
-    )
 # Convert audio if needed
 def convert_to_wav(audio_path: str) -> str:
-    ext = str(Path(audio_path).suffix).lower()
-    if ext != ".wav":
-        audio = AudioSegment.from_file(audio_path)
-        wav_path = str(Path(audio_path).with_suffix(".converted.wav"))
-        audio.export(wav_path, format="wav")
-        return wav_path
-    return audio_path
 # Split audio into chunks
 def split_audio(audio_path: str) -> list:
@@ -50,8 +71,12 @@ def split_audio(audio_path: str) -> list:
         audio = AudioSegment.from_file(audio_path)
         if len(audio) == 0:
             raise ValueError("Audio file is empty or invalid.")
-        return [audio[i:i + CHUNK_DURATION_MS] around(i, len(audio), CHUNK_DURATION_MS) for i in range(0, len(audio), CHUNK_DURATION_MS)]
     except Exception as e:
         raise ValueError(f"Failed to process audio: {str(e)}")
 # Helper to compute chunk start time
@@ -62,9 +87,10 @@ def get_chunk_time(index: int, chunk_duration_ms: int) -> str:
 # Transcribe audio with progress and timestamps
 def transcribe(audio_path: str, include_timestamps: bool = False, progress=gr.Progress()):
     try:
-        if not audio_path:
-            return "No audio file provided.", None
         # Convert to WAV if needed
         wav_path = convert_to_wav(audio_path)
@@ -73,43 +99,83 @@ def transcribe(audio_path: str, include_timestamps: bool = False, progress=gr.Pr
         total_chunks = len(chunks)
         transcript = []
         timestamped_transcript = []
         for i, chunk in enumerate(chunks):
             try:
                 with NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                     chunk.export(temp_file.name, format="wav")
                     result = PIPELINE(temp_file.name,
                                    generate_kwargs={"task": "transcribe", "language": "sv"})
                     text = result["text"].strip()
-                    transcript.append(text)
-                    if include_timestamps:
-                        timestamp = get_chunk_time(i, CHUNK_DURATION_MS)
-                        timestamped_transcript.append(f"[{timestamp}] {text}")
             finally:
-                if os.path.exists(temp_file.name):
-                    os.remove(temp_file.name)
             progress((i + 1) / total_chunks)
             yield " ".join(transcript), None
         # Clean up converted file if created
         if wav_path != audio_path and os.path.exists(wav_path):
-            os.remove(wav_path)
         # Prepare final transcript and downloadable file
         final_transcript = " ".join(transcript)
         download_content = "\n".join(timestamped_transcript) if include_timestamps else final_transcript
-        with NamedTemporaryFile(suffix=".txt", delete=False, mode='w', encoding='utf-8') as temp_file:
-            temp_file.write(download_content)
-            download_path = temp_file.name
         return final_transcript, download_path
     except Exception as e:
-        return f"Error during transcription: {str(e)}", None
 # Initialize pipeline globally
-PIPELINE = initialize_pipeline()
 # Gradio Interface with Blocks
 def create_interface():
@@ -136,4 +202,8 @@ def create_interface():
     return demo
 if __name__ == "__main__":
-    create_interface().launch()

 import os
 import torch
 import gradio as gr
+import logging
 from pydub import AudioSegment
+from pydub.exceptions import CouldntDecodeError
 from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from pathlib import Path
 from tempfile import NamedTemporaryFile
 from datetime import timedelta
 import time
+# Setup logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
 # Configuration
 MODEL_ID = "KBLab/kb-whisper-large"
 CHUNK_DURATION_MS = 10000
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 TORCH_DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
+SUPPORTED_FORMATS = {".wav", ".mp3", ".m4a"}
 # Initialize model and pipeline
 def initialize_pipeline():
+    try:
+        model = AutoModelForSpeechSeq2Seq.from_pretrained(
+            MODEL_ID,
+            torch_dtype=TORCH_DTYPE,
+            low_cpu_mem_usage=True
+        ).to(DEVICE)
+        processor = AutoProcessor.from_pretrained(MODEL_ID)
+        return pipeline(
+            "automatic-speech-recognition",
+            model=model,
+            tokenizer=processor.tokenizer,
+            feature_extractor=processor.feature_extractor,
+            device=DEVICE,
+            torch_dtype=TORCH_DTYPE,
+            model_kwargs={"use_flash_attention_2": torch.cuda.is_available()}
+        )
+    except Exception as e:
+        logger.error(f"Failed to initialize pipeline: {str(e)}")
+        raise RuntimeError("Unable to load transcription model. Please check your network connection or model ID.")
 # Convert audio if needed
 def convert_to_wav(audio_path: str) -> str:
+    try:
+        ext = str(Path(audio_path).suffix).lower()
+        if ext not in SUPPORTED_FORMATS:
+            raise ValueError(f"Unsupported audio format: {ext}. Supported formats: {', '.join(SUPPORTED_FORMATS)}")
+        if ext != ".wav":
+            audio = AudioSegment.from_file(audio_path)
+            wav_path = str(Path(audio_path).with_suffix(".converted.wav"))
+            audio.export(wav_path, format="wav")
+            return wav_path
+        return audio_path
+    except CouldntDecodeError:
+        logger.error(f"Failed to decode audio file: {audio_path}")
+        raise ValueError("Audio file is corrupted or in an unsupported format.")
+    except OSError as e:
+        logger.error(f"OS error during audio conversion: {str(e)}")
+        raise ValueError("Failed to process audio file due to a system error.")
+    except Exception as e:
+        logger.error(f"Unexpected error during audio conversion: {str(e)}")
+        raise ValueError("An unexpected error occurred while converting the audio.")
 # Split audio into chunks
 def split_audio(audio_path: str) -> list:
         audio = AudioSegment.from_file(audio_path)
         if len(audio) == 0:
             raise ValueError("Audio file is empty or invalid.")
+        return [audio[i:i + CHUNK_DURATION_MS] for i in range(0, len(audio), CHUNK_DURATION_MS)]
+    except CouldntDecodeError:
+        logger.error(f"Failed to decode audio for splitting: {audio_path}")
+        raise ValueError("Audio file is corrupted or in an unsupported format.")
     except Exception as e:
+        logger.error(f"Failed to split audio: {str(e)}")
         raise ValueError(f"Failed to process audio: {str(e)}")
 # Helper to compute chunk start time
 # Transcribe audio with progress and timestamps
 def transcribe(audio_path: str, include_timestamps: bool = False, progress=gr.Progress()):
     try:
+        if not audio_path or not os.path.exists(audio_path):
+            logger.warning("Invalid or missing audio file path.")
+            return "Please upload a valid audio file.", None
         # Convert to WAV if needed
         wav_path = convert_to_wav(audio_path)
         total_chunks = len(chunks)
         transcript = []
         timestamped_transcript = []
+        failed_chunks = 0
         for i, chunk in enumerate(chunks):
+            temp_file_path = None
             try:
                 with NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
+                    temp_file_path = temp_file.name
                     chunk.export(temp_file.name, format="wav")
                     result = PIPELINE(temp_file.name,
                                    generate_kwargs={"task": "transcribe", "language": "sv"})
                     text = result["text"].strip()
+                    if text:  # Only append non-empty transcriptions
+                        transcript.append(text)
+                        if include_timestamps:
+                            timestamp = get_chunk_time(i, CHUNK_DURATION_MS)
+                            timestamped_transcript.append(f"[{timestamp}] {text}")
+            except RuntimeError as e:
+                logger.warning(f"Failed to transcribe chunk {i+1}/{total_chunks}: {str(e)}")
+                failed_chunks += 1
+                transcript.append("[Transcription failed for this segment]")
+                if include_timestamps:
+                    timestamp = get_chunk_time(i, CHUNK_DURATION_MS)
+                    timestamped_transcript.append(f"[{timestamp}] [Transcription failed]")
+            except Exception as e:
+                logger.error(f"Unexpected error in chunk {i+1}/{total_chunks}: {str(e)}")
+                failed_chunks += 1
+                transcript.append("[Transcription failed for this segment]")
+                if include_timestamps:
+                    timestamp = get_chunk_time(i, CHUNK_DURATION_MS)
+                    timestamped_transcript.append(f"[{timestamp}] [Transcription failed]")
             finally:
+                if temp_file_path and os.path.exists(temp_file_path):
+                    try:
+                        os.remove(temp_file_path)
+                    except OSError as e:
+                        logger.warning(f"Failed to delete temporary file {temp_file_path}: {str(e)}")
             progress((i + 1) / total_chunks)
             yield " ".join(transcript), None
         # Clean up converted file if created
         if wav_path != audio_path and os.path.exists(wav_path):
+            try:
+                os.remove(wav_path)
+            except OSError as e:
+                logger.warning(f"Failed to delete converted WAV file {wav_path}: {str(e)}")
         # Prepare final transcript and downloadable file
         final_transcript = " ".join(transcript)
+        if failed_chunks > 0:
+            final_transcript = f"Warning: {failed_chunks}/{total_chunks} chunks failed to transcribe.\n{final_transcript}"
         download_content = "\n".join(timestamped_transcript) if include_timestamps else final_transcript
+        download_path = None
+        try:
+            with NamedTemporaryFile(suffix=".txt", delete=False, mode='w', encoding='utf-8') as temp_file:
+                temp_file.write(download_content)
+                download_path = temp_file.name
+        except OSError as e:
+            logger.error(f"Failed to create downloadable transcript: {str(e)}")
+            final_transcript = f"{final_transcript}\nNote: Could not generate downloadable transcript due to a file error."
         return final_transcript, download_path
+    except ValueError as e:
+        logger.error(f"Value error during transcription: {str(e)}")
+        return str(e), None
     except Exception as e:
+        logger.error(f"Unexpected error during transcription: {str(e)}")
+        return f"An unexpected error occurred: {str(e)}. Please try again or contact support.", None
 # Initialize pipeline globally
+try:
+    PIPELINE = initialize_pipeline()
+except RuntimeError as e:
+    logger.critical(f"Pipeline initialization failed: {str(e)}")
+    raise
 # Gradio Interface with Blocks
 def create_interface():
     return demo
 if __name__ == "__main__":
+    try:
+        create_interface().launch()
+    except Exception as e:
+        logger.critical(f"Failed to launch Gradio interface: {str(e)}")
+        print(f"Error: Could not start the application. Please check the logs for details.")