Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import pandas as pd | |
| import os | |
| import re | |
| from filelock import FileLock | |
| # ----------------------------- | |
| # Load Transformer Models | |
| # ----------------------------- | |
| english_model = pipeline( | |
| "sentiment-analysis", | |
| model="siebert/sentiment-roberta-large-english" | |
| ) | |
| urdu_model = pipeline( | |
| "sentiment-analysis", | |
| model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu" | |
| ) | |
| roman_urdu_model = pipeline( | |
| "sentiment-analysis", | |
| model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu" | |
| ) | |
| # ----------------------------- | |
| # CSV Setup | |
| # ----------------------------- | |
| SAVE_FILE = "sentiment_logs.csv" | |
| LOCK_FILE = SAVE_FILE + ".lock" | |
| if not os.path.exists(SAVE_FILE): | |
| pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv( | |
| SAVE_FILE, index=False, encoding="utf-8-sig" | |
| ) | |
| # ----------------------------- | |
| # Improved Language Detection | |
| # ----------------------------- | |
| def detect_language(text): | |
| urdu_script = re.compile(r"[\u0600-\u06FF]") | |
| if urdu_script.search(text): | |
| return "Urdu" | |
| roman_urdu_patterns = [ | |
| r"\b(hai|hain|tha|thi|parhta|parhai|acha|bura|bohot|zabardast)\b", | |
| r"\b(sir|madam|ustad|class|parh|samajh)\b", | |
| ] | |
| text_l = text.lower() | |
| for p in roman_urdu_patterns: | |
| if re.search(p, text_l): | |
| return "Roman Urdu" | |
| return "English" | |
| # ----------------------------- | |
| # Roman Urdu Normalization | |
| # ----------------------------- | |
| def normalize_roman_urdu(text): | |
| text = text.lower() | |
| text = text.replace("hy", "hai").replace("h", "hai") | |
| text = re.sub(r"\bnhi\b|\bnai\b|\bnhi\b", "nahi", text) | |
| return text | |
| # ----------------------------- | |
| # Normalize Labels | |
| # ----------------------------- | |
| def normalize_label(label): | |
| label = label.lower() | |
| if "pos" in label or "positive" in label: | |
| return "Positive" | |
| elif "neg" in label or "negative" in label: | |
| return "Negative" | |
| else: | |
| return "Neutral" | |
| # ----------------------------- | |
| # Polarity Explanation | |
| # ----------------------------- | |
| def polarity_explanation(text, sentiment): | |
| explanations = { | |
| "Positive": "Contains praise words or positive evaluation.", | |
| "Negative": "Contains criticism or negative expressions.", | |
| "Neutral": "Factual statement or balanced observation." | |
| } | |
| return explanations.get(sentiment, "") | |
| # ----------------------------- | |
| # Ensemble Roman Urdu + Urdu | |
| # ----------------------------- | |
| def ensemble_roman_urdu(text): | |
| ru = roman_urdu_model(text)[0] | |
| ur = urdu_model(text)[0] | |
| ru_sent, ur_sent = normalize_label(ru["label"]), normalize_label(ur["label"]) | |
| if ru_sent == ur_sent: | |
| return ru if ru["score"] >= ur["score"] else ur | |
| # Weight Roman Urdu higher for Roman Urdu input | |
| weight_ru = ru["score"] * 1.25 | |
| weight_ur = ur["score"] | |
| return ru if weight_ru >= weight_ur else ur | |
| # ----------------------------- | |
| # Adjust sentiment if low intensity | |
| # ----------------------------- | |
| def adjust_for_neutral(text, sentiment, score): | |
| if sentiment in ["Positive", "Negative"] and score < 0.7: | |
| return "Neutral", score | |
| return sentiment, score | |
| # ----------------------------- | |
| # Main Analysis Function | |
| # ----------------------------- | |
| def analyze_sentiment(text, lang_hint): | |
| if not text.strip(): | |
| return "⚠️ Please enter a sentence.", "", "", SAVE_FILE | |
| lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text) | |
| if lang == "English": | |
| result = english_model(text)[0] | |
| elif lang == "Urdu": | |
| result = urdu_model(text)[0] | |
| else: | |
| text = normalize_roman_urdu(text) | |
| result = ensemble_roman_urdu(text) | |
| sentiment = normalize_label(result["label"]) | |
| score = round(float(result["score"]), 3) | |
| sentiment, score = adjust_for_neutral(text, sentiment, score) | |
| explanation = polarity_explanation(text, sentiment) | |
| # Save logs | |
| with FileLock(LOCK_FILE): | |
| df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") \ | |
| if os.path.exists(SAVE_FILE) else pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]) | |
| new_row = pd.DataFrame([[text, lang, sentiment, score]], | |
| columns=["Sentence", "Language", "Sentiment", "Confidence"]) | |
| df = pd.concat([df, new_row], ignore_index=True) | |
| df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig") | |
| return sentiment, str(score), explanation, SAVE_FILE | |
| # ----------------------------- | |
| # Show Logs | |
| # ----------------------------- | |
| def show_logs(): | |
| if os.path.exists(SAVE_FILE): | |
| return pd.read_csv(SAVE_FILE, encoding="utf-8-sig") | |
| else: | |
| return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]) | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| "## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n" | |
| "Detect **Positive**, **Negative**, or **Neutral** tone with confidence score.\n\n" | |
| "🪶 Improved Roman Urdu normalization + ensemble + polarity explanation.\n" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type English, Urdu, or Roman Urdu...") | |
| lang_dropdown = gr.Dropdown( | |
| ["Auto Detect", "English", "Urdu", "Roman Urdu"], | |
| value="Auto Detect", label="🌐 Language" | |
| ) | |
| btn_analyze = gr.Button("🔍 Analyze Sentiment") | |
| btn_show = gr.Button("📂 Show Saved Logs") | |
| with gr.Column(): | |
| out_sent = gr.Textbox(label="Sentiment") | |
| out_conf = gr.Textbox(label="Confidence (0–1)") | |
| out_exp = gr.Textbox(label="Polarity Explanation") | |
| out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath") | |
| logs_df = gr.Dataframe( | |
| headers=["Sentence", "Language", "Sentiment", "Confidence"], | |
| label="🧾 Sentiment Logs", interactive=False | |
| ) | |
| btn_analyze.click(analyze_sentiment, | |
| inputs=[user_text, lang_dropdown], | |
| outputs=[out_sent, out_conf, out_exp, out_file]) | |
| btn_show.click(show_logs, outputs=[logs_df]) | |
| if __name__ == "__main__": | |
| demo.launch() | |