Spaces:

Lum4yx
/

mcp-sentiment

Sleeping

App Files Files Community

Lum4yx commited on Sep 19

Commit

ae9ed1e

verified ·

1 Parent(s): 476d8d2

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -15

app.py CHANGED Viewed

@@ -1,34 +1,125 @@
 import gradio as gr
 from textblob import TextBlob
-def sentiment_analysis(text: str) -> dict:
-    """
-    Analyze the sentiment of the given text.
-    Args:
-        text (str): The text to analyze
-    Returns:
-        dict: A dictionary containing polarity, subjectivity, and assessment
     """
     blob = TextBlob(text)
     sentiment = blob.sentiment
     return {
-        "polarity": round(sentiment.polarity, 2),  # -1 (negative) to 1 (positive)
-        "subjectivity": round(sentiment.subjectivity, 2),  # 0 (objective) to 1 (subjective)
         "assessment": "positive" if sentiment.polarity > 0 else "negative" if sentiment.polarity < 0 else "neutral"
     }
-# Create the Gradio interface
 demo = gr.Interface(
-    fn=sentiment_analysis,
-    inputs=gr.Textbox(placeholder="Enter text to analyze..."),
-    outputs=gr.JSON(),
-    title="Text Sentiment Analysis",
-    description="Analyze the sentiment of text using TextBlob"
 )
 # Launch the interface and MCP server
 if __name__ == "__main__":
     demo.launch(mcp_server=True)

 import gradio as gr
 from textblob import TextBlob
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import torch
+import base64
+import numpy as np
+import ffmpeg
+# 1. Set up device and data type for optimized performance
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+# 2. Define the model ID for the large Whisper model
+model_id = "openai/whisper-large-v3-turbo"
+# 3. Load the model from pretrained weights
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+)
+model.to(device)
+# 4. Load the processor which includes the feature extractor and tokenizer
+processor = AutoProcessor.from_pretrained(model_id)
+# 5. Create the ASR pipeline with the loaded components
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    torch_dtype=torch_dtype,
+    device=device,
+)
+def sentiment_analysis(text: str) -> dict:
+    """
+    Analyze the sentiment of the given text. (This function is unchanged)
     """
     blob = TextBlob(text)
     sentiment = blob.sentiment
     return {
+        "transcript": text,
+        "polarity": round(sentiment.polarity, 2),
+        "subjectivity": round(sentiment.subjectivity, 2),
         "assessment": "positive" if sentiment.polarity > 0 else "negative" if sentiment.polarity < 0 else "neutral"
     }
+def process_base64_audio(base64_data_uri: str) -> dict:
+    """
+    Decodes a Base64 audio data URI, processes it in-memory,
+    transcribes it using a Hugging Face Whisper pipeline, and then analyzes its sentiment.
+    Args:
+        base64_data_uri (str): A string in data URI format (e.g., "data:audio/wav;base64,UklGRi...").
+    Returns:
+        dict: The sentiment analysis result or an error message.
+    """
+    if not base64_data_uri or "base64," not in base64_data_uri:
+        return {"error": "Invalid or empty Base64 data URI provided."}
+    try:
+        # Parse the data URI to extract the Base64 encoded data
+        _, encoded_data = base64_data_uri.split(',', 1)
+        # Decode the Base64 string into binary audio data
+        audio_data = base64.b64decode(encoded_data)
+        # Use ffmpeg to convert the in-memory audio data to a raw PCM buffer.
+        # The pipeline expects a 16kHz mono audio stream.
+        out, _ = (
+            ffmpeg
+            .input('pipe:0')
+            .output('pipe:1', format='s16le', ac=1, ar=16000)
+            .run(input=audio_data, capture_stdout=True, capture_stderr=True)
+        )
+        # Convert the raw PCM buffer to a NumPy array of 32-bit floats.
+        audio_np = np.frombuffer(out, np.int16).astype(np.float32) / 32768.0
+        # Transcribe the audio from the NumPy array using the HF pipeline
+        transcription_result = pipe(audio_np)
+        transcript_text = transcription_result["text"]
+    except Exception as e:
+        # Capture potential errors from ffmpeg or the model
+        return {"error": f"Failed to process audio: {str(e)}"}
+    # Perform sentiment analysis on the transcribed text
+    return sentiment_analysis(transcript_text)
+# Create the Gradio interface with the Hugging Face theme
 demo = gr.Interface(
+    fn=process_base64_audio,
+    # The input remains a Textbox to accept the raw Base64 string from the API client
+    inputs=gr.Textbox(lines=5, placeholder="Paste your Base64 encoded audio data URI here...", label="Base64 Audio Input"),
+    outputs=gr.JSON(label="Analysis Result"),
+    title="🎙️ Audio Sentiment Analysis (Whisper Large v3)",
+    description="""
+    Analyze the sentiment of spoken words.
+    This tool accepts a **Base64 encoded audio data URI**, transcribes the audio in-memory using the `openai/whisper-large-v3` model,
+    and performs sentiment analysis on the text with TextBlob.
+    """,
+    examples=[
+        ["data:audio/wav;base64,UklGRiQ...<placeholder_for_a_short_positive_clip>"],
+        ["data:audio/wav;base64,UklGRiQ...<placeholder_for_a_short_negative_clip>"]
+    ],
+    article="""
+    ### How to get a Base64 Audio URI?
+    You can use an online converter or a script (like the provided `test_client.py`) to convert a short audio file (e.g., .wav or .mp3) into a Base64 data URI.
+    The format must be `data:audio/[format];base64,[encoded_string]`.
+    """,
+    theme='huggingface' # This applies the new theme
 )
 # Launch the interface and MCP server
 if __name__ == "__main__":
+    # You will need to have ffmpeg installed on your system for this to work.
+    # You also need to install the required python packages. This model is large and requires significant resources.
+    # pip install gradio textblob "transformers[torch]" accelerate safetensors ffmpeg-python numpy
     demo.launch(mcp_server=True)