tahamueed23's picture
Update app.py
0780c88 verified
import gradio as gr
from transformers import pipeline
import pandas as pd
import os
import re
from filelock import FileLock
# -----------------------------
# Load Transformer Models
# -----------------------------
english_model = pipeline(
"sentiment-analysis",
model="siebert/sentiment-roberta-large-english"
)
urdu_model = pipeline(
"sentiment-analysis",
model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)
roman_urdu_model = pipeline(
"sentiment-analysis",
model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)
# -----------------------------
# CSV Setup
# -----------------------------
SAVE_FILE = "sentiment_logs.csv"
LOCK_FILE = SAVE_FILE + ".lock"
if not os.path.exists(SAVE_FILE):
pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv(
SAVE_FILE, index=False, encoding="utf-8-sig"
)
# -----------------------------
# Improved Language Detection
# -----------------------------
def detect_language(text):
urdu_script = re.compile(r"[\u0600-\u06FF]")
if urdu_script.search(text):
return "Urdu"
roman_urdu_patterns = [
r"\b(hai|hain|tha|thi|parhta|parhai|acha|bura|bohot|zabardast)\b",
r"\b(sir|madam|ustad|class|parh|samajh)\b",
]
text_l = text.lower()
for p in roman_urdu_patterns:
if re.search(p, text_l):
return "Roman Urdu"
return "English"
# -----------------------------
# Roman Urdu Normalization
# -----------------------------
def normalize_roman_urdu(text):
text = text.lower()
text = text.replace("hy", "hai").replace("h", "hai")
text = re.sub(r"\bnhi\b|\bnai\b|\bnhi\b", "nahi", text)
return text
# -----------------------------
# Normalize Labels
# -----------------------------
def normalize_label(label):
label = label.lower()
if "pos" in label or "positive" in label:
return "Positive"
elif "neg" in label or "negative" in label:
return "Negative"
else:
return "Neutral"
# -----------------------------
# Polarity Explanation
# -----------------------------
def polarity_explanation(text, sentiment):
explanations = {
"Positive": "Contains praise words or positive evaluation.",
"Negative": "Contains criticism or negative expressions.",
"Neutral": "Factual statement or balanced observation."
}
return explanations.get(sentiment, "")
# -----------------------------
# Ensemble Roman Urdu + Urdu
# -----------------------------
def ensemble_roman_urdu(text):
ru = roman_urdu_model(text)[0]
ur = urdu_model(text)[0]
ru_sent, ur_sent = normalize_label(ru["label"]), normalize_label(ur["label"])
if ru_sent == ur_sent:
return ru if ru["score"] >= ur["score"] else ur
# Weight Roman Urdu higher for Roman Urdu input
weight_ru = ru["score"] * 1.25
weight_ur = ur["score"]
return ru if weight_ru >= weight_ur else ur
# -----------------------------
# Adjust sentiment if low intensity
# -----------------------------
def adjust_for_neutral(text, sentiment, score):
if sentiment in ["Positive", "Negative"] and score < 0.7:
return "Neutral", score
return sentiment, score
# -----------------------------
# Main Analysis Function
# -----------------------------
def analyze_sentiment(text, lang_hint):
if not text.strip():
return "⚠️ Please enter a sentence.", "", "", SAVE_FILE
lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)
if lang == "English":
result = english_model(text)[0]
elif lang == "Urdu":
result = urdu_model(text)[0]
else:
text = normalize_roman_urdu(text)
result = ensemble_roman_urdu(text)
sentiment = normalize_label(result["label"])
score = round(float(result["score"]), 3)
sentiment, score = adjust_for_neutral(text, sentiment, score)
explanation = polarity_explanation(text, sentiment)
# Save logs
with FileLock(LOCK_FILE):
df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") \
if os.path.exists(SAVE_FILE) else pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
new_row = pd.DataFrame([[text, lang, sentiment, score]],
columns=["Sentence", "Language", "Sentiment", "Confidence"])
df = pd.concat([df, new_row], ignore_index=True)
df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")
return sentiment, str(score), explanation, SAVE_FILE
# -----------------------------
# Show Logs
# -----------------------------
def show_logs():
if os.path.exists(SAVE_FILE):
return pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
else:
return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown(
"## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
"Detect **Positive**, **Negative**, or **Neutral** tone with confidence score.\n\n"
"🪶 Improved Roman Urdu normalization + ensemble + polarity explanation.\n"
)
with gr.Row():
with gr.Column():
user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type English, Urdu, or Roman Urdu...")
lang_dropdown = gr.Dropdown(
["Auto Detect", "English", "Urdu", "Roman Urdu"],
value="Auto Detect", label="🌐 Language"
)
btn_analyze = gr.Button("🔍 Analyze Sentiment")
btn_show = gr.Button("📂 Show Saved Logs")
with gr.Column():
out_sent = gr.Textbox(label="Sentiment")
out_conf = gr.Textbox(label="Confidence (0–1)")
out_exp = gr.Textbox(label="Polarity Explanation")
out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")
logs_df = gr.Dataframe(
headers=["Sentence", "Language", "Sentiment", "Confidence"],
label="🧾 Sentiment Logs", interactive=False
)
btn_analyze.click(analyze_sentiment,
inputs=[user_text, lang_dropdown],
outputs=[out_sent, out_conf, out_exp, out_file])
btn_show.click(show_logs, outputs=[logs_df])
if __name__ == "__main__":
demo.launch()