Spaces:

Lum4yx
/

mcp-sentiment

Sleeping

App Files Files Community

Lum4yx commited on Sep 20

Commit

68728a0

verified ·

1 Parent(s): 48c3d6a

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -67

app.py CHANGED Viewed

@@ -2,11 +2,9 @@ import gradio as gr
 from textblob import TextBlob
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import torch
-import base64
 import numpy as np
-import ffmpeg
 import os
-import glob # Imported to find example files
 # 1. Set up device and data type for optimized performance
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -41,7 +39,6 @@ def sentiment_analysis(text: str) -> dict:
     """
     blob = TextBlob(text)
     sentiment = blob.sentiment
     return {
         "transcript": text,
         "polarity": round(sentiment.polarity, 2),
@@ -49,66 +46,35 @@ def sentiment_analysis(text: str) -> dict:
         "assessment": "positive" if sentiment.polarity > 0 else "negative" if sentiment.polarity < 0 else "neutral"
     }
-def process_audio(audio_path: str) -> dict:
     """
-    Processes an audio file from a local path, transcribes it, and analyzes its sentiment.
     """
-    if not audio_path or not os.path.exists(audio_path):
-        return {"error": "Invalid or non-existent file path provided."}
-    try:
-        out, _ = (
-            ffmpeg
-            .input(audio_path)
-            .output('pipe:1', format='s16le', ac=1, ar=16000)
-            .run(capture_stdout=True, capture_stderr=True)
-        )
-        audio_np = np.frombuffer(out, np.int16).astype(np.float32) / 32768.0
-        transcription_result = pipe(audio_np)
-        transcript_text = transcription_result["text"]
-    except Exception as e:
-        return {"error": f"Failed to process audio file: {str(e)}"}
-    return sentiment_analysis(transcript_text)
-def process_base64_audio(base64_data_uri: str) -> dict:
-    """
-    Decodes a Base64 audio data URI, processes it in-memory, transcribes it, and analyzes its sentiment.
-    """
-    if not isinstance(base64_data_uri, str) or "base64," not in base64_data_uri:
-        return {"error": "Invalid or empty Base64 data URI provided."}
     try:
-        _, encoded_data = base64_data_uri.split(',', 1)
-        audio_data = base64.b64decode(encoded_data)
-        out, _ = (
-            ffmpeg
-            .input('pipe:0')
-            .output('pipe:1', format='s16le', ac=1, ar=16000)
-            .run(input=audio_data, capture_stdout=True, capture_stderr=True)
-        )
-        audio_np = np.frombuffer(out, np.int16).astype(np.float32) / 32768.0
-        transcription_result = pipe(audio_np)
-        transcript_text = transcription_result["text"]
     except Exception as e:
-        return {"error": f"Failed to process Base64 audio: {str(e)}"}
     return sentiment_analysis(transcript_text)
-def analyze_audio_input(audio_input: str) -> dict:
-    """
-    Router function to handle both file paths and Base64 strings.
-    This allows the Gradio UI to use file uploads and the API to use Base64.
-    """
-    # Check if the input is a valid file path provided by the Gradio component
-    if audio_input and os.path.exists(audio_input):
-        return process_audio(audio_input)
-    # Otherwise, assume it's a Base64 string from an API call
-    elif isinstance(audio_input, str):
-        return process_base64_audio(audio_input)
-    else:
-        return {"error": f"Invalid input type: {type(audio_input)}"}
 # --- Code to find and load examples ---
 examples_dir = "examples"
@@ -127,26 +93,21 @@ examples_list = [[file] for file in example_files]
 # Create the Gradio interface
 demo = gr.Interface(
-    fn=analyze_audio_input, # Point to the main router function
-    inputs=gr.Audio(type="filepath", label="Upload Audio File or Record"),
     outputs=gr.JSON(label="Analysis Result"),
     title="🎙️ Audio Sentiment Analysis (Whisper Small)",
-    description="""
-    Analyze the sentiment of spoken words.
-    **UI**: Upload an audio file, record directly, or click an example.
-    **API**: The endpoint also accepts a Base64 encoded audio data URI as input.
-    """,
     examples=examples_list,
     article="""
     ### How it Works
-    This tool uses a speech-to-text model (`openai/whisper-small`) to transcribe audio, then TextBlob analyzes the text sentiment.
-    The server can handle both local file paths (from the UI) and Base64 strings (from API calls).
     """,
     theme='huggingface'
 )
-# Launch the interface and MCP server
 if __name__ == "__main__":
-    # Ensure ffmpeg is installed on your system.
-    # pip install gradio textblob "transformers[torch]" accelerate safetensors ffmpeg-python numpy
     demo.launch(mcp_server=True)

 from textblob import TextBlob
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import torch
 import numpy as np
 import os
+import glob
 # 1. Set up device and data type for optimized performance
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
     """
     blob = TextBlob(text)
     sentiment = blob.sentiment
     return {
         "transcript": text,
         "polarity": round(sentiment.polarity, 2),
         "assessment": "positive" if sentiment.polarity > 0 else "negative" if sentiment.polarity < 0 else "neutral"
     }
+# NEW: Simplified main function to process audio from a NumPy array
+def analyze_audio(audio: tuple) -> dict:
     """
+    Processes audio data from a NumPy array, transcribes it, and analyzes its sentiment.
+    Gradio provides the audio as a tuple (sample_rate, data).
     """
+    if audio is None:
+        return {"error": "No audio provided. Please upload, record, or select an example."}
+    # Unpack the audio tuple
+    sample_rate, audio_data = audio
+    # Convert the audio data to the format the model expects (float32)
+    audio_float32 = audio_data.astype(np.float32) / 32768.0
     try:
+        # Transcribe the audio
+        transcription_result = pipe(audio_float32)
+        transcript_text = transcription_result["text"].strip()
+        if not transcript_text:
+             return {"error": "Transcription failed or audio was silent."}
     except Exception as e:
+        return {"error": f"Failed to transcribe audio: {str(e)}"}
+    # Perform sentiment analysis on the transcript
     return sentiment_analysis(transcript_text)
 # --- Code to find and load examples ---
 examples_dir = "examples"
 # Create the Gradio interface
 demo = gr.Interface(
+    fn=analyze_audio,  # CHANGED: Point to the new, simplified function
+    inputs=gr.Audio(type="numpy", label="Upload Audio File or Record"),  # CHANGED: type="numpy"
     outputs=gr.JSON(label="Analysis Result"),
     title="🎙️ Audio Sentiment Analysis (Whisper Small)",
+    description="Analyze the sentiment of spoken words. Upload an audio file, record directly, or click an example below.",
     examples=examples_list,
     article="""
     ### How it Works
+    This tool uses OpenAI's **Whisper Small** model to transcribe audio into text.
+    Then, **TextBlob** is used to perform sentiment analysis on the resulting transcript.
+    By using `type="numpy"`, the interface directly processes audio data, making it more reliable.
     """,
     theme='huggingface'
 )
+# Launch the interface
 if __name__ == "__main__":
     demo.launch(mcp_server=True)