Spaces:

EdgarDataScientist
/

REM_WASTE_INTERVIEW

Sleeping

App Files Files Community

EdgarDataScientist commited on May 30

Commit

8cdbd03

verified ·

1 Parent(s): d752d17

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -124

app.py CHANGED Viewed

@@ -8,150 +8,100 @@ import torchaudio
 import torch
 import ffmpeg
-# Load SpeechBrain
 try:
     from speechbrain.inference import EncoderClassifier
-    speechbrain_classifier = EncoderClassifier.from_hparams(
         source="speechbrain/lang-id-commonlanguage_ecapa",
         savedir="pretrained_models/lang-id-commonlanguage_ecapa"
     )
-    SPEECHBRAIN_LOADED = True
 except Exception as e:
-    st.warning(f"Could not load SpeechBrain model: {e}. Using simulation.")
-    SPEECHBRAIN_LOADED = False
-class AccentAnalyzer:
-    def __init__(self):
-        self.accent_profiles = {
-            "American": {"features": ["rhotic", "flapped_t", "cot_caught_merger"]},
-            "British": {"features": ["non_rhotic", "t_glottalization", "trap_bath_split"]},
-            "Australian": {"features": ["non_rhotic", "flat_a", "high_rising_terminal"]},
-            "Canadian": {"features": ["rhotic", "canadian_raising", "eh_tag"]},
-            "Indian": {"features": ["retroflex_consonants", "monophthongization", "syllable_timing"]},
-            "Irish": {"features": ["dental_fricatives", "alveolar_l", "soft_consonants"]},
-            "Scottish": {"features": ["rolled_r", "monophthongs", "glottal_stops"]},
-            "South African": {"features": ["non_rhotic", "kit_split", "kw_hw_distinction"]}
-        }
-        self.accent_data = self._simulate_profiles()
-    def _simulate_profiles(self):
-        all_features = set(f for p in self.accent_profiles.values() for f in p["features"])
-        data = {}
-        for name, profile in self.accent_profiles.items():
-            data[name] = {
-                "primary_features": profile["features"],
-                "feature_probabilities": {
-                    f: random.uniform(0.7, 0.9) if f in profile["features"] else random.uniform(0.1, 0.4)
-                    for f in all_features
-                }
-            }
-        return data
-    def _simulate_accent_classification(self, audio_path):
-        all_features = {f for p in self.accent_profiles.values() for f in p["features"]}
-        detected = {f: random.uniform(0.1, 0.9) for f in all_features}
-        scores = {}
-        for accent, data in self.accent_data.items():
-            score = sum(
-                detected[f] * data["feature_probabilities"][f] * (3.0 if f in data["primary_features"] else 1.0)
-                for f in all_features
-            )
-            scores[accent] = score
-        top = max(scores, key=scores.get)
-        conf = (scores[top] / max(scores.values())) * 100
-        return {
-            "accent_type": top,
-            "confidence": conf,
-            "explanation": f"Detected **{top}** accent with {conf:.1f}% confidence.",
-            "all_scores": scores
-        }
-    def analyze_accent(self, audio_path):
-        if not SPEECHBRAIN_LOADED:
-            return self._simulate_accent_classification(audio_path)
-        try:
-            signal, sr = torchaudio.load(audio_path)
-            duration = signal.shape[1] / sr
-            if duration < 1.0:
-                raise ValueError("Audio too short to analyze.")
-            if signal.shape[0] > 1:
-                signal = signal.mean(dim=0, keepdim=True)
-            if sr != 16000:
-                signal = torchaudio.transforms.Resample(sr, 16000)(signal)
-            signal = signal.unsqueeze(0)  # [1, 1, time]
-            pred = speechbrain_classifier.classify_batch(signal)
-            probs = pred[0].squeeze(0).tolist()
-            labels = pred[1][0]
-            scores = {speechbrain_classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
-            if labels[0] == 'en':
-                result = self._simulate_accent_classification(audio_path)
-                result["all_scores"] = scores
-                return result
-            return {
-                "accent_type": labels[0],
-                "confidence": max(probs) * 100,
-                "explanation": f"Detected language: **{labels[0]}** ({max(probs)*100:.1f}%)",
-                "all_scores": scores
-            }
-        except Exception as e:
-            st.warning(f"Fallback to simulation: {e}")
-            return self._simulate_accent_classification(audio_path)
-def download_and_extract_audio(url_or_path, is_upload=False):
     temp_dir = tempfile.mkdtemp()
-    video_path = os.path.join(temp_dir, "video.mp4")
     audio_path = os.path.join(temp_dir, "audio.wav")
     if is_upload:
         with open(video_path, "wb") as f:
-            f.write(url_or_path.read())
     else:
-        with requests.get(url_or_path, stream=True) as r:
             r.raise_for_status()
             with open(video_path, 'wb') as f:
                 for chunk in r.iter_content(chunk_size=8192):
                     f.write(chunk)
-    (
-        ffmpeg
-        .input(video_path)
-        .output(audio_path, ar=16000, ac=1, format='wav')
-        .run(quiet=True, overwrite_output=True)
-    )
     return audio_path
-# --- Streamlit App ---
-st.set_page_config(page_title="Accent Analyzer", layout="wide")
-st.title("🗣️ English Accent or Language Analyzer")
-st.markdown("Upload a video/audio file or provide a direct `.mp4` or `.wav` URL:")
-url = st.text_input("🔗 Enter Direct MP4/WAV URL:")
-uploaded_file = st.file_uploader("📁 Or upload a file (MP4/WAV)", type=["mp4", "wav"])
-if st.button("Analyze"):
-    if not url and not uploaded_file:
-        st.error("Please enter a valid URL or upload a file.")
     else:
-        try:
-            with st.spinner("Processing audio..."):
-                audio_path = download_and_extract_audio(uploaded_file if uploaded_file else url, is_upload=bool(uploaded_file))
-                analyzer = AccentAnalyzer()
-                results = analyzer.analyze_accent(audio_path)
-            st.success(results["explanation"])
-            labels, values = zip(*results["all_scores"].items())
-            fig, ax = plt.subplots()
-            ax.bar(labels, values, color='skyblue')
-            ax.set_ylabel('Confidence (%)')
-            ax.set_title('Accent/Language Confidence')
-            plt.xticks(rotation=45)
-            st.pyplot(fig)
-        except Exception as e:
-            st.error(f"Failed to analyze: {e}")

 import torch
 import ffmpeg
+# Try loading SpeechBrain
 try:
     from speechbrain.inference import EncoderClassifier
+    classifier = EncoderClassifier.from_hparams(
         source="speechbrain/lang-id-commonlanguage_ecapa",
         savedir="pretrained_models/lang-id-commonlanguage_ecapa"
     )
+    SB_READY = True
 except Exception as e:
+    st.warning(" SpeechBrain model load failed. Falling back to simulation.")
+    SB_READY = False
+# Accent Profiles for English detection
+accent_profiles = {
+    "American": ["rhotic", "flapped_t", "cot_caught_merger"],
+    "British": ["non_rhotic", "t_glottalization", "trap_bath_split"],
+    "Australian": ["non_rhotic", "flat_a", "high_rising_terminal"],
+    "Canadian": ["rhotic", "canadian_raising", "eh_tag"],
+    "Indian": ["retroflex_consonants", "monophthongization", "syllable_timing"]
+}
+def simulate_accent_classification():
+    accent = random.choice(list(accent_profiles.keys()))
+    confidence = random.uniform(75, 98)
+    return {
+        "accent": accent,
+        "confidence": round(confidence, 2),
+        "summary": f"Simulated detection: {accent} accent with {confidence:.2f}% confidence."
+    }
+def real_accent_classification(audio_path):
+    try:
+        signal, sr = torchaudio.load(audio_path)
+        if signal.shape[0] > 1:
+            signal = signal.mean(dim=0, keepdim=True)
+        if sr != 16000:
+            signal = torchaudio.transforms.Resample(sr, 16000)(signal)
+        signal = signal.unsqueeze(0)
+        pred = classifier.classify_batch(signal)
+        probs = pred[0].squeeze(0).tolist()
+        labels = pred[1][0]
+        lang_scores = {classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
+        top_lang = max(lang_scores, key=lang_scores.get)
+        if top_lang != "en":
+            return {"accent": "Non-English", "confidence": lang_scores[top_lang], "summary": f"Detected language: {top_lang}"}
+        # Simulate accent if English
+        result = simulate_accent_classification()
+        result["summary"] += f" (Base language: English)"
+        return result
+    except Exception as e:
+        return simulate_accent_classification()
+def extract_audio(url_or_file, is_upload=False):
     temp_dir = tempfile.mkdtemp()
+    video_path = os.path.join(temp_dir, "input_video.mp4")
     audio_path = os.path.join(temp_dir, "audio.wav")
     if is_upload:
         with open(video_path, "wb") as f:
+            f.write(url_or_file.read())
     else:
+        with requests.get(url_or_file, stream=True) as r:
             r.raise_for_status()
             with open(video_path, 'wb') as f:
                 for chunk in r.iter_content(chunk_size=8192):
                     f.write(chunk)
+    ffmpeg.input(video_path).output(audio_path, ar=16000, ac=1).run(overwrite_output=True, quiet=True)
     return audio_path
+# --- Streamlit UI ---
+st.set_page_config(page_title="English Accent Analyzer", layout="centered")
+st.title("🗣️ English Accent Analyzer")
+st.markdown("### 🎯 Objective:\nUpload or link a video/audio of a speaker. We’ll detect if they're speaking English and simulate the accent.")
+url_input = st.text_input("🔗 Paste public Loom or direct MP4/WAV link:")
+uploaded_file = st.file_uploader("📁 Or upload a video/audio file", type=["mp4", "wav"])
+if st.button(" Analyze"):
+    if not url_input and not uploaded_file:
+        st.error("Please provide a valid URL or upload a file.")
     else:
+        with st.spinner("Analyzing..."):
+            try:
+                audio_path = extract_audio(uploaded_file if uploaded_file else url_input, is_upload=bool(uploaded_file))
+                result = real_accent_classification(audio_path) if SB_READY else simulate_accent_classification()
+                st.success(f"🎧 Detected Accent: **{result['accent']}**")
+                st.metric("Confidence", f"{result['confidence']}%")
+                st.markdown(f"📝 {result['summary']}")
+            except Exception as e:
+                st.error(f"❌ Error during analysis: {e}")