Spaces:

tahamueed23
/

Sentiment-Analyzer

Sleeping

App Files Files Community

tahamueed23 commited on Oct 3

Commit

3840bbb

verified ·

1 Parent(s): c12e35c

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -65

app.py CHANGED Viewed

@@ -1,71 +1,50 @@
 import gradio as gr
-from transformers import pipeline
 import pandas as pd
-import os
-import re
-# --- models (keep yours) ---
-english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")
-urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
-roman_urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
-# --- CSV setup (UTF-8 with BOM) ---
-SAVE_FILE = "sentiment_logs.csv"
-if not os.path.exists(SAVE_FILE):
-    df_init = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
-    df_init.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")   # write BOM + UTF-8
-# --- helper functions (keep yours) ---
-def detect_language(text):
-    urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
-    if any(ch in urdu_chars for ch in text):
-        return "Urdu"
-    roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad|pyar|dil|insaan)\b"
-    if re.search(roman_urdu_pattern, text.lower()):
-        return "Roman Urdu"
-    return "English"
-def normalize_label(label):
-    label = label.lower()
-    if "positive" in label:
-        return "Positive"
-    elif "negative" in label:
-        return "Negative"
     else:
-        return "Neutral"
-def sentiment_with_tips(sentiment):
-    tips = {
-        "Positive": "😊 Great! Keep it up.",
-        "Negative": "😞 Looks negative. Consider constructive changes.",
-        "Neutral": "😐 Neutral — neither strongly positive nor negative."
-    }
-    return tips.get(sentiment, "")
-# --- main analyze function (writes UTF-8 rows safely) ---
-def analyze_sentiment(text, lang_hint):
-    if not text or not text.strip():
-        return "⚠️ Please enter a sentence.", "", "", SAVE_FILE
-    lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)
-    # Choose model
-    if lang == "English":
-        result = english_model(text)[0]
-    elif lang == "Urdu":
-        result = urdu_model(text)[0]
-    else:
-        result = roman_urdu_model(text)[0]
-    sentiment = normalize_label(result["label"])
     score = round(result["score"], 3)
-    explanation = sentiment_with_tips(sentiment)
-    # Prepare row and append using utf-8-sig (Excel-friendly)
-    new_row = pd.DataFrame([[text, lang, sentiment, score]],
-                           columns=["Sentence", "Language", "Sentiment", "Confidence"])
-    # Append: header only if file missing (safe even if file exists)
-    new_row.to_csv(SAVE_FILE, mode="a", index=False, header=not os.path.exists(SAVE_FILE), encoding="utf-8-sig")
-    return sentiment, str(score), explanation, SAVE_FILE

 import gradio as gr
 import pandas as pd
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+from langdetect import detect
+# Load models
+models = {
+    "en": pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english"),
+    "ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"),  # replace with your trained Urdu model
+    "roman-ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")  # replace with your trained Roman Urdu model
+}
+CSV_FILE = "sentiment_results.csv"
+def analyze_text(text):
+    try:
+        lang = detect(text)
+    except:
+        lang = "en"
+    if lang.startswith("ur"):
+        model = models["ur"]
+    elif any(word.isascii() for word in text):  # crude roman urdu check
+        model = models["roman-ur"]
     else:
+        model = models["en"]
+    result = model(text)[0]
+    sentiment = result["label"]
     score = round(result["score"], 3)
+    # Save to CSV
+    df = pd.DataFrame([[text, lang, sentiment, score]],
+                      columns=["Sentence", "Language", "Sentiment", "Confidence"])
+    try:
+        old = pd.read_csv(CSV_FILE, encoding="utf-8-sig")
+        df = pd.concat([old, df], ignore_index=True)
+    except:
+        pass
+    df.to_csv(CSV_FILE, index=False, encoding="utf-8-sig")
+    return f"Language: {lang} | Sentiment: {sentiment} ({score})"
+demo = gr.Interface(fn=analyze_text,
+                    inputs="text",
+                    outputs="text",
+                    title="Multilingual Sentiment Analysis")
+if __name__ == "__main__":
+    demo.launch()