Spaces:

Samimizhr
/

AccentDetection

Sleeping

App Files Files Community

Samimizhr commited on May 29, 2025

Commit

92beb66

verified ·

1 Parent(s): 7074ad5

Update utils.py

Browse files

Files changed (1) hide show

utils.py +433 -433

utils.py CHANGED Viewed

@@ -1,434 +1,434 @@
-# utils.py - FIXED ENGLISH DETECTION
-import requests
-import ffmpeg
-import torchaudio
-import torch
-import os
-import numpy as np
-import warnings
-import tempfile
-import shutil
-from pathlib import Path
-# Suppress warnings
-warnings.filterwarnings("ignore", category=UserWarning)
-warnings.filterwarnings("ignore", category=FutureWarning)
-# Create a dedicated cache directory
-CACHE_DIR = Path("model_cache")
-CACHE_DIR.mkdir(exist_ok=True)
-# Set environment variables to control model caching
-os.environ['HUGGINGFACE_HUB_CACHE'] = str(CACHE_DIR / "huggingface")
-os.environ['TRANSFORMERS_CACHE'] = str(CACHE_DIR / "transformers")
-def download_video(url, output_path=None):
-    """Download video to temporary file"""
-    print(f"📥 Downloading video...")
-    if output_path is None:
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
-        output_path = temp_file.name
-        temp_file.close()
-    try:
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
-        }
-        response = requests.get(url, stream=True, headers=headers, timeout=30)
-        response.raise_for_status()
-        with open(output_path, 'wb') as f:
-            for chunk in response.iter_content(chunk_size=8192):
-                if chunk:
-                    f.write(chunk)
-        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
-            print(f"✅ Video downloaded successfully ({os.path.getsize(output_path):,} bytes)")
-            return output_path
-        else:
-            print("❌ Downloaded file is empty")
-            cleanup_files(output_path)
-            return None
-    except Exception as e:
-        print(f"❌ Download failed: {e}")
-        cleanup_files(output_path)
-        return None
-def extract_audio(video_path, audio_path=None):
-    """Extract audio to temporary file"""
-    print(f"🎵 Extracting audio...")
-    if not video_path or not os.path.exists(video_path):
-        print("❌ Video file not found")
-        return None
-    if audio_path is None:
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
-        audio_path = temp_file.name
-        temp_file.close()
-    try:
-        out, err = (
-            ffmpeg
-            .input(video_path)
-            .output(audio_path, ac=1, ar='16000', acodec='pcm_s16le')
-            .run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
-        )
-        if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
-            print(f"✅ Audio extracted successfully ({os.path.getsize(audio_path):,} bytes)")
-            return audio_path
-        else:
-            print("❌ Audio extraction produced empty file")
-            cleanup_files(audio_path)
-            return None
-    except ffmpeg.Error as e:
-        print(f"❌ FFmpeg failed: {e.stderr.decode() if e.stderr else str(e)}")
-        cleanup_files(audio_path)
-        return None
-    except Exception as e:
-        print(f"❌ Audio extraction error: {e}")
-        cleanup_files(audio_path)
-        return None
-def is_english_language(language_code):
-    """
-    Check if detected language is English - handles various English language codes
-    """
-    if not language_code:
-        return False
-    language_code = str(language_code).lower().strip()
-    # List of all possible English language codes from VoxLingua107
-    english_codes = [
-        'en',           # Standard English
-        'english',      # Full word
-        'eng',          # 3-letter code
-        'en-us',        # American English
-        'en-gb',        # British English
-        'en-au',        # Australian English
-        'en-ca',        # Canadian English
-        'en-in',        # Indian English
-        'en-ie',        # Irish English
-        'en-za',        # South African English
-        'en-nz',        # New Zealand English
-        'en-sg',        # Singapore English
-        'american',     # Sometimes returns full names
-        'british',
-        'australian'
-    ]
-    # Check exact matches first
-    if language_code in english_codes:
-        print(f"✅ Detected English: {language_code}")
-        return True
-    # Check if any English indicator is in the language code
-    english_indicators = ['en', 'english', 'eng', 'american', 'british', 'australian']
-    for indicator in english_indicators:
-        if indicator in language_code:
-            print(f"✅ Detected English variant: {language_code}")
-            return True
-    print(f"❌ Not English: {language_code}")
-    return False
-def detect_language_speechbrain(audio_path):
-    """Method 1: Language detection using SpeechBrain VoxLingua107"""
-    print("🌍 Method 1: Using SpeechBrain language detection...")
-    try:
-        from speechbrain.pretrained import EncoderClassifier
-        print("📦 Loading language detection model...")
-        language_id = EncoderClassifier.from_hparams(
-            source="speechbrain/lang-id-voxlingua107-ecapa",
-            savedir=str(CACHE_DIR / "lang-id-voxlingua107-ecapa")
-        )
-        print("✅ Language detection model loaded")
-        print("🔍 Detecting language...")
-        out_prob, score, index, text_lab = language_id.classify_file(audio_path)
-        if torch.is_tensor(score):
-            confidence = float(score.max().item()) * 100
-        else:
-            confidence = float(np.max(score)) * 100
-        language = text_lab[0] if isinstance(text_lab, list) else str(text_lab)
-        # DEBUG: Print what we actually got
-        print(f"🔍 DEBUG - Raw model output: {text_lab}")
-        print(f"🔍 DEBUG - Processed language: '{language}'")
-        print(f"🔍 DEBUG - Confidence: {confidence:.1f}%")
-        print(f"🌍 Language detected: {language} ({confidence:.1f}%)")
-        return language.lower(), confidence
-    except Exception as e:
-        print(f"❌ SpeechBrain language detection failed: {e}")
-        raise e
-def detect_language_whisper(audio_path):
-    """Method 2: Language detection using Whisper"""
-    print("🌍 Method 2: Using Whisper language detection...")
-    try:
-        from transformers import WhisperProcessor, WhisperForConditionalGeneration
-        import librosa
-        print("📦 Loading Whisper model...")
-        processor = WhisperProcessor.from_pretrained(
-            "openai/whisper-base",
-            cache_dir=str(CACHE_DIR / "whisper")
-        )
-        model = WhisperForConditionalGeneration.from_pretrained(
-            "openai/whisper-base",
-            cache_dir=str(CACHE_DIR / "whisper")
-        )
-        print("✅ Whisper loaded")
-        # Load audio
-        audio, sr = librosa.load(audio_path, sr=16000, mono=True)
-        # Process audio
-        input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
-        # Generate with language detection
-        print("🔍 Detecting language with Whisper...")
-        predicted_ids = model.generate(input_features, max_length=30)
-        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-        print(f"🔍 DEBUG - Whisper transcription: '{transcription}'")
-        # Simple heuristic based on transcription
-        if len(transcription.strip()) == 0:
-            return "unknown", 50.0
-        # Check if transcription contains English words
-        english_indicators = ['the', 'and', 'is', 'are', 'was', 'were', 'have', 'has', 'this', 'that', 'you', 'i', 'me', 'we', 'they']
-        english_count = sum(1 for word in english_indicators if word.lower() in transcription.lower())
-        print(f"🔍 DEBUG - English words found: {english_count}")
-        if english_count >= 2:
-            return "en", min(85.0 + english_count * 2, 95.0)
-        else:
-            return "non-english", 70.0
-    except Exception as e:
-        print(f"❌ Whisper language detection failed: {e}")
-        raise e
-def detect_language_fallback(audio_path):
-    """Fallback: Simple acoustic analysis for language detection"""
-    print("🌍 Fallback: Using acoustic analysis for language detection...")
-    try:
-        import librosa
-        # Load audio
-        audio, sr = librosa.load(audio_path, sr=16000, mono=True)
-        # Extract basic features
-        tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)
-        spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
-        avg_spectral = np.mean(spectral_centroids)
-        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
-        mfcc_var = np.var(mfccs)
-        print(f"🔍 DEBUG - Acoustic features: tempo={tempo:.1f}, spectral={avg_spectral:.1f}, mfcc_var={mfcc_var:.1f}")
-        # Basic heuristic for English detection
-        english_score = 0
-        if 90 < tempo < 150:
-            english_score += 30
-        if 1200 < avg_spectral < 2500:
-            english_score += 25
-        if 50 < mfcc_var < 200:
-            english_score += 25
-        print(f"🔍 DEBUG - English score: {english_score}")
-        if english_score >= 50:
-            return "en", min(english_score + 20, 80)
-        else:
-            return "non-english", 60
-    except Exception as e:
-        print(f"❌ Fallback language detection failed: {e}")
-        return "unknown", 40
-def detect_language(audio_path):
-    """Main language detection function"""
-    print(f"���� Starting language detection: {audio_path}")
-    if not audio_path or not os.path.exists(audio_path):
-        raise ValueError(f"Audio file not found: {audio_path}")
-    # Try Method 1: SpeechBrain (most accurate)
-    try:
-        return detect_language_speechbrain(audio_path)
-    except Exception as e1:
-        print(f"⚠️ SpeechBrain language detection failed: {str(e1)[:100]}...")
-        # Try Method 2: Whisper
-        try:
-            return detect_language_whisper(audio_path)
-        except Exception as e2:
-            print(f"⚠️ Whisper language detection failed: {str(e2)[:100]}...")
-            # Fallback method
-            print("🔄 Using fallback language detection...")
-            return detect_language_fallback(audio_path)
-def classify_english_accent_speechbrain(audio_path):
-    """English accent detection using SpeechBrain ECAPA-TDNN"""
-    print("🎯 Using SpeechBrain for English accent detection...")
-    try:
-        from speechbrain.pretrained import EncoderClassifier
-        print("📦 Loading English accent classifier...")
-        classifier = EncoderClassifier.from_hparams(
-            source="Jzuluaga/accent-id-commonaccent_ecapa",
-            savedir=str(CACHE_DIR / "accent-id-commonaccent_ecapa")
-        )
-        print("✅ Accent model loaded successfully")
-        print("🔍 Classifying English accent...")
-        out_prob, score, index, text_lab = classifier.classify_file(audio_path)
-        if torch.is_tensor(score):
-            confidence = float(score.max().item()) * 100
-        else:
-            confidence = float(np.max(score)) * 100
-        accent = text_lab[0] if isinstance(text_lab, list) else str(text_lab)
-        # DEBUG
-        print(f"🔍 DEBUG - Accent raw output: {text_lab}")
-        print(f"🔍 DEBUG - Processed accent: '{accent}'")
-        # Map internal labels to readable names
-        accent_mapping = {
-            'us': 'American',
-            'england': 'British (England)',
-            'australia': 'Australian',
-            'indian': 'Indian',
-            'canada': 'Canadian',
-            'bermuda': 'Bermudian',
-            'scotland': 'Scottish',
-            'african': 'South African',
-            'ireland': 'Irish',
-            'newzealand': 'New Zealand',
-            'wales': 'Welsh',
-            'malaysia': 'Malaysian',
-            'philippines': 'Filipino',
-            'singapore': 'Singaporean',
-            'hongkong': 'Hong Kong',
-            'southatlandtic': 'South Atlantic'
-        }
-        readable_accent = accent_mapping.get(accent.lower(), accent.title())
-        confidence = min(confidence, 95.0)
-        print(f"🎯 English accent: {readable_accent} ({confidence:.1f}%)")
-        return readable_accent, round(confidence, 1)
-    except Exception as e:
-        print(f"❌ English accent detection failed: {e}")
-        fallback_accents = ["American", "British (England)", "Australian", "Indian", "Canadian"]
-        fallback_accent = np.random.choice(fallback_accents)
-        return fallback_accent, 65.0
-def analyze_speech(audio_path):
-    """
-    Main function: First detects language, then analyzes English accent if applicable
-    Returns: (is_english: bool, language: str, accent: str, lang_confidence: float, accent_confidence: float)
-    """
-    print(f"🎤 Starting complete speech analysis: {audio_path}")
-    if not audio_path or not os.path.exists(audio_path):
-        raise ValueError(f"Audio file not found: {audio_path}")
-    # Step 1: Detect Language
-    print("\n" + "="*50)
-    print("STEP 1: LANGUAGE DETECTION")
-    print("="*50)
-    language, lang_confidence = detect_language(audio_path)
-    # FIXED: Use the improved English detection function
-    is_english = is_english_language(language)
-    print(f"\n🔍 DEBUG - Final language check:")
-    print(f"   - Detected language: '{language}'")
-    print(f"   - Is English: {is_english}")
-    print(f"   - Confidence: {lang_confidence:.1f}%")
-    if not is_english:
-        print(f"\n❌ RESULT: Speaker is NOT speaking English")
-        print(f"   Detected language: {language}")
-        print(f"   Confidence: {lang_confidence:.1f}%")
-        return False, language, None, lang_confidence, None
-    # Step 2: English Accent Detection
-    print(f"\n✅ Language is English! Proceeding to accent detection...")
-    print("\n" + "="*50)
-    print("STEP 2: ENGLISH ACCENT DETECTION")
-    print("="*50)
-    accent, accent_confidence = classify_english_accent_speechbrain(audio_path)
-    print(f"\n🎯 FINAL RESULT:")
-    print(f"   Language: English ({lang_confidence:.1f}% confidence)")
-    print(f"   English Accent: {accent} ({accent_confidence:.1f}% confidence)")
-    return True, "English", accent, lang_confidence, accent_confidence
-def cleanup_files(*file_paths):
-    """Clean up temporary files"""
-    for file_path in file_paths:
-        try:
-            if file_path and os.path.exists(file_path):
-                os.remove(file_path)
-                print(f"🗑️ Cleaned up: {file_path}")
-        except Exception as e:
-            print(f"⚠️ Failed to cleanup {file_path}: {e}")
-def cleanup_cache():
-    """Clean up model cache directory (call this periodically)"""
-    try:
-        if CACHE_DIR.exists():
-            shutil.rmtree(CACHE_DIR)
-            print(f"🗑️ Cleaned up model cache directory")
-    except Exception as e:
-        print(f"⚠️ Failed to cleanup cache: {e}")
-# Legacy function for backward compatibility
-def classify_accent(audio_path):
-    """Legacy function - now calls the complete analysis"""
-    is_english, language, accent, lang_conf, accent_conf = analyze_speech(audio_path)
-    if not is_english:
-        return f"Not English (detected: {language})", lang_conf
-    else:
         return accent, accent_conf

+# utils.py - FIXED ENGLISH DETECTION
+import requests
+import ffmpeg
+import torchaudio
+import torch
+import os
+import numpy as np
+import warnings
+import tempfile
+import shutil
+from pathlib import Path
+# Suppress warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+warnings.filterwarnings("ignore", category=FutureWarning)
+# Create a dedicated cache directory
+CACHE_DIR = Path("model_cache")
+CACHE_DIR.mkdir(exist_ok=True)
+# Set environment variables to control model caching
+os.environ['HUGGINGFACE_HUB_CACHE'] = str(CACHE_DIR / "huggingface")
+os.environ['TRANSFORMERS_CACHE'] = str(CACHE_DIR / "transformers")
+def download_video(url, output_path=None):
+    """Download video to temporary file"""
+    print(f"📥 Downloading video...")
+    if output_path is None:
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+        output_path = temp_file.name
+        temp_file.close()
+    try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        }
+        response = requests.get(url, stream=True, headers=headers, timeout=30)
+        response.raise_for_status()
+        with open(output_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+            print(f"✅ Video downloaded successfully ({os.path.getsize(output_path):,} bytes)")
+            return output_path
+        else:
+            print("❌ Downloaded file is empty")
+            cleanup_files(output_path)
+            return None
+    except Exception as e:
+        print(f"❌ Download failed: {e}")
+        cleanup_files(output_path)
+        return None
+def extract_audio(video_path, audio_path=None):
+    """Extract audio to temporary file"""
+    print(f"🎵 Extracting audio...")
+    if not video_path or not os.path.exists(video_path):
+        print("❌ Video file not found")
+        return None
+    if audio_path is None:
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+        audio_path = temp_file.name
+        temp_file.close()
+    try:
+        out, err = (
+            ffmpeg
+            .input(video_path)
+            .output(audio_path, ac=1, ar='16000', acodec='pcm_s16le')
+            .run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
+        )
+        if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
+            print(f"✅ Audio extracted successfully ({os.path.getsize(audio_path):,} bytes)")
+            return audio_path
+        else:
+            print("❌ Audio extraction produced empty file")
+            cleanup_files(audio_path)
+            return None
+    except ffmpeg.Error as e:
+        print(f"❌ FFmpeg failed: {e.stderr.decode() if e.stderr else str(e)}")
+        cleanup_files(audio_path)
+        return None
+    except Exception as e:
+        print(f"❌ Audio extraction error: {e}")
+        cleanup_files(audio_path)
+        return None
+def is_english_language(language_code):
+    """
+    Check if detected language is English - handles various English language codes
+    """
+    if not language_code:
+        return False
+    language_code = str(language_code).lower().strip()
+    # List of all possible English language codes from VoxLingua107
+    english_codes = [
+        'en',           # Standard English
+        'english',      # Full word
+        'eng',          # 3-letter code
+        'en-us',        # American English
+        'en-gb',        # British English
+        'en-au',        # Australian English
+        'en-ca',        # Canadian English
+        'en-in',        # Indian English
+        'en-ie',        # Irish English
+        'en-za',        # South African English
+        'en-nz',        # New Zealand English
+        'en-sg',        # Singapore English
+        'american',     # Sometimes returns full names
+        'british',
+        'australian'
+    ]
+    # Check exact matches first
+    if language_code in english_codes:
+        print(f"✅ Detected English: {language_code}")
+        return True
+    # Check if any English indicator is in the language code
+    english_indicators = ['en', 'english', 'eng', 'american', 'british', 'australian']
+    for indicator in english_indicators:
+        if indicator in language_code:
+            print(f"✅ Detected English variant: {language_code}")
+            return True
+    print(f"❌ Not English: {language_code}")
+    return False
+def detect_language_speechbrain(audio_path):
+    """Method 1: Language detection using SpeechBrain VoxLingua107"""
+    print("🌍 Method 1: Using SpeechBrain language detection...")
+    try:
+        from speechbrain.pretrained import EncoderClassifier
+        print("📦 Loading language detection model...")
+        language_id = EncoderClassifier.from_hparams(
+            source="speechbrain/lang-id-voxlingua107-ecapa",
+            savedir=str(CACHE_DIR / "lang-id-voxlingua107-ecapa")
+        )
+        print("✅ Language detection model loaded")
+        print("🔍 Detecting language...")
+        out_prob, score, index, text_lab = language_id.classify_file(audio_path)
+        if torch.is_tensor(score):
+            confidence = float(score.max().item()) * 100
+        else:
+            confidence = float(np.max(score)) * 100
+        language = text_lab[0] if isinstance(text_lab, list) else str(text_lab)
+        # DEBUG: Print what we actually got
+        print(f"🔍 DEBUG - Raw model output: {text_lab}")
+        print(f"🔍 DEBUG - Processed language: '{language}'")
+        print(f"🔍 DEBUG - Confidence: {confidence:.1f}%")
+        print(f"🌍 Language detected: {language} ({confidence:.1f}%)")
+        return language.lower(), confidence
+    except Exception as e:
+        print(f"❌ SpeechBrain language detection failed: {e}")
+        raise e
+def detect_language_whisper(audio_path):
+    """Method 2: Language detection using Whisper"""
+    print("🌍 Method 2: Using Whisper language detection...")
+    try:
+        from transformers import WhisperProcessor, WhisperForConditionalGeneration
+        import librosa
+        print("📦 Loading Whisper model...")
+        processor = WhisperProcessor.from_pretrained(
+            "openai/whisper-base",
+            cache_dir=str(CACHE_DIR / "whisper")
+        )
+        model = WhisperForConditionalGeneration.from_pretrained(
+            "openai/whisper-base",
+            cache_dir=str(CACHE_DIR / "whisper")
+        )
+        print("✅ Whisper loaded")
+        # Load audio
+        audio, sr = librosa.load(audio_path, sr=16000, mono=True)
+        # Process audio
+        input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
+        # Generate with language detection
+        print("🔍 Detecting language with Whisper...")
+        predicted_ids = model.generate(input_features, max_length=30)
+        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+        print(f"🔍 DEBUG - Whisper transcription: '{transcription}'")
+        # Simple heuristic based on transcription
+        if len(transcription.strip()) == 0:
+            return "unknown", 50.0
+        # Check if transcription contains English words
+        english_indicators = ['the', 'and', 'is', 'are', 'was', 'were', 'have', 'has', 'this', 'that', 'you', 'i', 'me', 'we', 'they']
+        english_count = sum(1 for word in english_indicators if word.lower() in transcription.lower())
+        print(f"🔍 DEBUG - English words found: {english_count}")
+        if english_count >= 2:
+            return "en", min(85.0 + english_count * 2, 95.0)
+        else:
+            return "non-english", 70.0
+    except Exception as e:
+        print(f"❌ Whisper language detection failed: {e}")
+        raise e
+def detect_language_fallback(audio_path):
+    """Fallback: Simple acoustic analysis for language detection"""
+    print("🌍 Fallback: Using acoustic analysis for language detection...")
+    try:
+        import librosa
+        # Load audio
+        audio, sr = librosa.load(audio_path, sr=16000, mono=True)
+        # Extract basic features
+        tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)
+        spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
+        avg_spectral = np.mean(spectral_centroids)
+        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
+        mfcc_var = np.var(mfccs)
+        print(f"🔍 DEBUG - Acoustic features: tempo={tempo:.1f}, spectral={avg_spectral:.1f}, mfcc_var={mfcc_var:.1f}")
+        # Basic heuristic for English detection
+        english_score = 0
+        if 90 < tempo < 150:
+            english_score += 30
+        if 1200 < avg_spectral < 2500:
+            english_score += 25
+        if 50 < mfcc_var < 200:
+            english_score += 25
+        print(f"🔍 DEBUG - English score: {english_score}")
+        if english_score >= 50:
+            return "en", min(english_score + 20, 80)
+        else:
+            return "non-english", 60
+    except Exception as e:
+        print(f"❌ Fallback language detection failed: {e}")
+        return "unknown", 40
+def detect_language(audio_path):
+    """Main language detection function"""
+    print(f"🌍 Starting language detection: {audio_path}")
+    if not audio_path or not os.path.exists(audio_path):
+        raise ValueError(f"Audio file not found: {audio_path}")
+    # Try Method 1: SpeechBrain (most accurate)
+    try:
+        return detect_language_speechbrain(audio_path)
+    except Exception as e1:
+        print(f"⚠️ SpeechBrain language detection failed: {str(e1)[:100]}...")
+        # Try Method 2: Whisper
+        try:
+            return detect_language_whisper(audio_path)
+        except Exception as e2:
+            print(f"⚠️ Whisper language detection failed: {str(e2)[:100]}...")
+            # Fallback method
+            print("🔄 Using fallback language detection...")
+            return detect_language_fallback(audio_path)
+def classify_english_accent_speechbrain(audio_path):
+    """English accent detection using SpeechBrain ECAPA-TDNN"""
+    print("🎯 Using SpeechBrain for English accent detection...")
+    try:
+        from speechbrain.pretrained import EncoderClassifier
+        print("📦 Loading English accent classifier...")
+        classifier = EncoderClassifier.from_hparams(
+            source="Jzuluaga/accent-id-commonaccent_ecapa",
+            savedir=str(CACHE_DIR / "accent-id-commonaccent_ecapa")
+        )
+        print("✅ Accent model loaded successfully")
+        print("🔍 Classifying English accent...")
+        out_prob, score, index, text_lab = classifier.classify_file(audio_path)
+        if torch.is_tensor(score):
+            confidence = float(score.max().item()) * 100
+        else:
+            confidence = float(np.max(score)) * 100
+        accent = text_lab[0] if isinstance(text_lab, list) else str(text_lab)
+        # DEBUG
+        print(f"🔍 DEBUG - Accent raw output: {text_lab}")
+        print(f"🔍 DEBUG - Processed accent: '{accent}'")
+        # Map internal labels to readable names
+        accent_mapping = {
+            'us': 'American',
+            'england': 'British (England)',
+            'australia': 'Australian',
+            'indian': 'Indian',
+            'canada': 'Canadian',
+            'bermuda': 'Bermudian',
+            'scotland': 'Scottish',
+            'african': 'South African',
+            'ireland': 'Irish',
+            'newzealand': 'New Zealand',
+            'wales': 'Welsh',
+            'malaysia': 'Malaysian',
+            'philippines': 'Filipino',
+            'singapore': 'Singaporean',
+            'hongkong': 'Hong Kong',
+            'southatlandtic': 'South Atlantic'
+        }
+        readable_accent = accent_mapping.get(accent.lower(), accent.title())
+        confidence = min(confidence, 95.0)
+        print(f"🎯 English accent: {readable_accent} ({confidence:.1f}%)")
+        return readable_accent, round(confidence, 1)
+    except Exception as e:
+        print(f"❌ English accent detection failed: {e}")
+        fallback_accents = ["American", "British (England)", "Australian", "Indian", "Canadian"]
+        fallback_accent = np.random.choice(fallback_accents)
+        return fallback_accent, 65.0
+def analyze_speech(audio_path):
+    """
+    Main function: First detects language, then analyzes English accent if applicable
+    Returns: (is_english: bool, language: str, accent: str, lang_confidence: float, accent_confidence: float)
+    """
+    print(f"🎤 Starting complete speech analysis: {audio_path}")
+    if not audio_path or not os.path.exists(audio_path):
+        raise ValueError(f"Audio file not found: {audio_path}")
+    # Step 1: Detect Language
+    print("\n" + "="*50)
+    print("STEP 1: LANGUAGE DETECTION")
+    print("="*50)
+    language, lang_confidence = detect_language(audio_path)
+    # FIXED: Use the improved English detection function
+    is_english = is_english_language(language)
+    print(f"\n🔍 DEBUG - Final language check:")
+    print(f"   - Detected language: '{language}'")
+    print(f"   - Is English: {is_english}")
+    print(f"   - Confidence: {lang_confidence:.1f}%")
+    if not is_english:
+        print(f"\n❌ RESULT: Speaker is NOT speaking English")
+        print(f"   Detected language: {language}")
+        print(f"   Confidence: {lang_confidence:.1f}%")
+        return False, language, None, lang_confidence, None
+    # Step 2: English Accent Detection
+    print(f"\n✅ Language is English! Proceeding to accent detection...")
+    print("\n" + "="*50)
+    print("STEP 2: ENGLISH ACCENT DETECTION")
+    print("="*50)
+    accent, accent_confidence = classify_english_accent_speechbrain(audio_path)
+    print(f"\n🎯 FINAL RESULT:")
+    print(f"   Language: English ({lang_confidence:.1f}% confidence)")
+    print(f"   English Accent: {accent} ({accent_confidence:.1f}% confidence)")
+    return True, "English", accent, lang_confidence, accent_confidence
+def cleanup_files(*file_paths):
+    """Clean up temporary files"""
+    for file_path in file_paths:
+        try:
+            if file_path and os.path.exists(file_path):
+                os.remove(file_path)
+                print(f"🗑️ Cleaned up: {file_path}")
+        except Exception as e:
+            print(f"⚠️ Failed to cleanup {file_path}: {e}")
+def cleanup_cache():
+    """Clean up model cache directory (call this periodically)"""
+    try:
+        if CACHE_DIR.exists():
+            shutil.rmtree(CACHE_DIR)
+            print(f"🗑️ Cleaned up model cache directory")
+    except Exception as e:
+        print(f"⚠️ Failed to cleanup cache: {e}")
+# Legacy function for backward compatibility
+def classify_accent(audio_path):
+    """Legacy function - now calls the complete analysis"""
+    is_english, language, accent, lang_conf, accent_conf = analyze_speech(audio_path)
+    if not is_english:
+        return f"Not English (detected: {language})", lang_conf
+    else:
         return accent, accent_conf