Spaces:

Diggz10
/

emotiondetector1

Running

App Files Files Community

Diggz10 commited on Jul 26

Commit

5cde27f

verified ·

1 Parent(s): 4c23f39

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -9

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 from transformers import pipeline
 import soundfile as sf
 import os
 # --- Model Loading ---
 try:
@@ -13,33 +15,72 @@ except Exception as e:
 # --- Prediction Function ---
 def predict_emotion(audio_file):
-    if classifier is None: return {"error": "The AI model could not be loaded."}
-    if audio_file is None: return {"error": "No audio input provided."}
-    if isinstance(audio_file, str): audio_path = audio_file
     elif isinstance(audio_file, tuple):
         sample_rate, audio_array = audio_file
         temp_audio_path = "temp_audio_from_mic.wav"
         sf.write(temp_audio_path, audio_array, sample_rate)
         audio_path = temp_audio_path
-    else: return {"error": f"Invalid audio input format: {type(audio_file)}"}
     try:
         results = classifier(audio_path, top_k=5)
         return {item['label']: round(item['score'], 3) for item in results}
-    except Exception as e: return {"error": f"An error occurred during prediction: {str(e)}"}
     finally:
-        if 'temp_audio_path' in locals() and os.path.exists(temp_audio_path): os.remove(temp_audio_path)
 # --- Gradio Interface ---
 iface = gr.Interface(
     fn=predict_emotion,
     inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload Audio or Record with Microphone"),
     outputs=gr.Label(num_top_classes=5, label="Emotion Probabilities"),
     title="AI Audio Emotion Detector",
     description="Upload an audio file or record your voice to detect emotions.",
-    # THIS LINE IS CRITICAL - WE ARE CREATING AN EXPLICIT API ENDPOINT
-    api_name="predict"
 )
-# Launch the Gradio app with explicit server settings
 if __name__ == "__main__":
     iface.queue().launch(server_name="0.0.0.0", share=True)

 from transformers import pipeline
 import soundfile as sf
 import os
+import base64
+import tempfile
 # --- Model Loading ---
 try:
 # --- Prediction Function ---
 def predict_emotion(audio_file):
+    if classifier is None:
+        return {"error": "The AI model could not be loaded."}
+    if audio_file is None:
+        return {"error": "No audio input provided."}
+    # Handle different input types
+    if isinstance(audio_file, str):
+        audio_path = audio_file
     elif isinstance(audio_file, tuple):
         sample_rate, audio_array = audio_file
         temp_audio_path = "temp_audio_from_mic.wav"
         sf.write(temp_audio_path, audio_array, sample_rate)
         audio_path = temp_audio_path
+    else:
+        return {"error": f"Invalid audio input format: {type(audio_file)}"}
     try:
         results = classifier(audio_path, top_k=5)
         return {item['label']: round(item['score'], 3) for item in results}
+    except Exception as e:
+        return {"error": f"An error occurred during prediction: {str(e)}"}
     finally:
+        if 'temp_audio_path' in locals() and os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
+# --- API Function for Base64 Input ---
+def predict_emotion_api(data):
+    """
+    API function that accepts base64 encoded audio data
+    Expected input format: {"data": "base64_encoded_audio_string"}
+    """
+    if classifier is None:
+        return {"error": "The AI model could not be loaded."}
+    try:
+        # Decode base64 audio data
+        audio_data = base64.b64decode(data)
+        # Create temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_file:
+            temp_file.write(audio_data)
+            temp_audio_path = temp_file.name
+        # Predict emotion
+        results = classifier(temp_audio_path, top_k=5)
+        # Clean up temp file
+        os.unlink(temp_audio_path)
+        return {item['label']: round(item['score'], 3) for item in results}
+    except Exception as e:
+        return {"error": f"An error occurred during prediction: {str(e)}"}
 # --- Gradio Interface ---
+# Main interface for web UI
 iface = gr.Interface(
     fn=predict_emotion,
     inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload Audio or Record with Microphone"),
     outputs=gr.Label(num_top_classes=5, label="Emotion Probabilities"),
     title="AI Audio Emotion Detector",
     description="Upload an audio file or record your voice to detect emotions.",
+    api_name="predict"  # This creates /api/predict/ endpoint
 )
+# Launch the Gradio app
 if __name__ == "__main__":
     iface.queue().launch(server_name="0.0.0.0", share=True)