Spaces:

Diggz10
/

emotiondetector1

Running

App Files Files Community

Diggz10 commited on Jul 26

Commit

0ea75df

verified ·

1 Parent(s): b6e3c2e

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -24

app.py CHANGED Viewed

@@ -1,21 +1,19 @@
 import gradio as gr
 from transformers import pipeline
-import librosa
-import numpy as np
 import soundfile as sf
 import os
 # --- Model Loading ---
-# We'll use the pipeline abstraction from transformers for simplicity.
-# This model is specifically designed for audio classification (emotion detection).
-# It will automatically handle the loading of the model and its preprocessor.
-classifier = pipeline("audio-classification", model="mrm8488/Emotion-detection-from-audio-files")
-# --- Emotion Labels Mapping (Optional, for clearer output) ---
-# The model outputs raw labels, we can define a more readable mapping if needed
-# For this specific model, the labels are already pretty clear.
-# Example labels from the model's page: 'anger', 'disgust', 'fear', 'happiness', 'neutral', 'sadness', 'surprise'
 # --- Prediction Function ---
 def predict_emotion(audio_file):
@@ -23,16 +21,17 @@ def predict_emotion(audio_file):
     Predicts emotions from an audio file.
     Args:
-        audio_file (str or np.ndarray): Path to the audio file or a numpy array
-                                        (if using microphone input directly).
-                                        Gradio's Audio component usually provides
-                                        a file path for file uploads or a tuple
-                                        (samplerate, audio_array) for microphone.
     Returns:
         dict: A dictionary of emotion labels and their probabilities.
     """
     if audio_file is None:
-        return {"error": "No audio input provided."}
     # Gradio's Audio component can return a path to a temp file for file uploads,
     # or a tuple (samplerate, numpy_array) for microphone input.
@@ -47,14 +46,14 @@ def predict_emotion(audio_file):
         sf.write(temp_audio_path, audio_array, sample_rate)
         audio_path = temp_audio_path
     else:
-        return {"error": "Invalid audio input format."}
     try:
         # Perform inference
-        results = classifier(audio_path)
         # Process results into a dictionary for better display
-        emotion_scores = {item['label']: item['score'] for item in results}
         return emotion_scores
     except Exception as e:
@@ -69,10 +68,14 @@ def predict_emotion(audio_file):
 # Define the Gradio interface
 iface = gr.Interface(
     fn=predict_emotion,
-    inputs=gr.Audio(type="filepath", label="Upload Audio or Record with Microphone", sources=["microphone", "file"]),
-    outputs=gr.Label(num_top_classes=7, label="Emotion Probabilities"), # Adjust num_top_classes based on model's output labels
     title="AI Audio Emotion Detector",
-    description="Upload an audio file or record your voice to detect emotions like anger, disgust, fear, happiness, neutral, sadness, and surprise."
 )
 # Launch the Gradio app

 import gradio as gr
 from transformers import pipeline
 import soundfile as sf
 import os
 # --- Model Loading ---
+# We switched to 'superb/wav2vec2-base-superb-er' as it's a well-established and public model for emotion recognition.
+# This should resolve the download issues encountered previously.
+try:
+    classifier = pipeline("audio-classification", model="superb/wav2vec2-base-superb-er")
+except Exception as e:
+    # If there's an error during model loading, we can display it in the Gradio interface
+    # This helps in debugging issues directly on the Hugging Face Space.
+    def error_fn(audio_file):
+        return {"error": f"Failed to load the model. Please check the logs. Error: {str(e)}"}
+    classifier = None
 # --- Prediction Function ---
 def predict_emotion(audio_file):
     Predicts emotions from an audio file.
     Args:
+        audio_file (str or tuple): Path to the audio file (from upload) or a tuple
+                                   (samplerate, audio_array) from microphone input.
     Returns:
         dict: A dictionary of emotion labels and their probabilities.
     """
+    # Handle case where the model failed to load
+    if classifier is None:
+        return {"error": "The AI model could not be loaded. The application cannot start."}
     if audio_file is None:
+        return {"error": "No audio input provided. Please upload a file or record."}
     # Gradio's Audio component can return a path to a temp file for file uploads,
     # or a tuple (samplerate, numpy_array) for microphone input.
         sf.write(temp_audio_path, audio_array, sample_rate)
         audio_path = temp_audio_path
     else:
+        return {"error": f"Invalid audio input format: {type(audio_file)}"}
     try:
         # Perform inference
+        results = classifier(audio_path, top_k=5) # top_k ensures we get all relevant emotion scores
         # Process results into a dictionary for better display
+        emotion_scores = {item['label']: round(item['score'], 3) for item in results}
         return emotion_scores
     except Exception as e:
 # Define the Gradio interface
 iface = gr.Interface(
     fn=predict_emotion,
+    inputs=gr.Audio(sources=["microphone", "file"], type="filepath", label="Upload Audio or Record with Microphone"),
+    outputs=gr.Label(num_top_classes=5, label="Emotion Probabilities"), # This model has 4 emotions + 'no-emotion'
     title="AI Audio Emotion Detector",
+    description="Upload an audio file or record your voice to detect emotions. This model is trained to recognize 'anger', 'happiness', 'neutral', 'sadness', and 'no-emotion'.",
+    examples=[
+        # You can add example audio files to your Hugging Face Space and reference them here.
+        # For now, we'll leave this empty.
+    ]
 )
 # Launch the Gradio app