Spaces:

tahamueed23
/

Sentiment-Analyzer

Sleeping

App Files Files Community

Sentiment-Analyzer / app.py

tahamueed23

Update app.py

0780c88 verified 6 days ago

raw

history blame contribute delete

6.38 kB

	import gradio as gr
	from transformers import pipeline
	import pandas as pd
	import os
	import re
	from filelock import FileLock

	# -----------------------------
	# Load Transformer Models
	# -----------------------------
	english_model = pipeline(
	"sentiment-analysis",
	model="siebert/sentiment-roberta-large-english"
	)

	urdu_model = pipeline(
	"sentiment-analysis",
	model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
	)

	roman_urdu_model = pipeline(
	"sentiment-analysis",
	model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
	)

	# -----------------------------
	# CSV Setup
	# -----------------------------
	SAVE_FILE = "sentiment_logs.csv"
	LOCK_FILE = SAVE_FILE + ".lock"

	if not os.path.exists(SAVE_FILE):
	pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv(
	SAVE_FILE, index=False, encoding="utf-8-sig"
	)

	# -----------------------------
	# Improved Language Detection
	# -----------------------------
	def detect_language(text):
	urdu_script = re.compile(r"[\u0600-\u06FF]")
	if urdu_script.search(text):
	return "Urdu"

	roman_urdu_patterns = [
	r"\b(hai\|hain\|tha\|thi\|parhta\|parhai\|acha\|bura\|bohot\|zabardast)\b",
	r"\b(sir\|madam\|ustad\|class\|parh\|samajh)\b",
	]

	text_l = text.lower()
	for p in roman_urdu_patterns:
	if re.search(p, text_l):
	return "Roman Urdu"

	return "English"

	# -----------------------------
	# Roman Urdu Normalization
	# -----------------------------
	def normalize_roman_urdu(text):
	text = text.lower()
	text = text.replace("hy", "hai").replace("h", "hai")
	text = re.sub(r"\bnhi\b\|\bnai\b\|\bnhi\b", "nahi", text)
	return text

	# -----------------------------
	# Normalize Labels
	# -----------------------------
	def normalize_label(label):
	label = label.lower()
	if "pos" in label or "positive" in label:
	return "Positive"
	elif "neg" in label or "negative" in label:
	return "Negative"
	else:
	return "Neutral"

	# -----------------------------
	# Polarity Explanation
	# -----------------------------
	def polarity_explanation(text, sentiment):
	explanations = {
	"Positive": "Contains praise words or positive evaluation.",
	"Negative": "Contains criticism or negative expressions.",
	"Neutral": "Factual statement or balanced observation."
	}
	return explanations.get(sentiment, "")

	# -----------------------------
	# Ensemble Roman Urdu + Urdu
	# -----------------------------
	def ensemble_roman_urdu(text):
	ru = roman_urdu_model(text)[0]
	ur = urdu_model(text)[0]

	ru_sent, ur_sent = normalize_label(ru["label"]), normalize_label(ur["label"])

	if ru_sent == ur_sent:
	return ru if ru["score"] >= ur["score"] else ur

	# Weight Roman Urdu higher for Roman Urdu input
	weight_ru = ru["score"] * 1.25
	weight_ur = ur["score"]
	return ru if weight_ru >= weight_ur else ur

	# -----------------------------
	# Adjust sentiment if low intensity
	# -----------------------------
	def adjust_for_neutral(text, sentiment, score):
	if sentiment in ["Positive", "Negative"] and score < 0.7:
	return "Neutral", score
	return sentiment, score

	# -----------------------------
	# Main Analysis Function
	# -----------------------------
	def analyze_sentiment(text, lang_hint):
	if not text.strip():
	return "⚠️ Please enter a sentence.", "", "", SAVE_FILE

	lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)

	if lang == "English":
	result = english_model(text)[0]
	elif lang == "Urdu":
	result = urdu_model(text)[0]
	else:
	text = normalize_roman_urdu(text)
	result = ensemble_roman_urdu(text)

	sentiment = normalize_label(result["label"])
	score = round(float(result["score"]), 3)
	sentiment, score = adjust_for_neutral(text, sentiment, score)
	explanation = polarity_explanation(text, sentiment)

	# Save logs
	with FileLock(LOCK_FILE):
	df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") \
	if os.path.exists(SAVE_FILE) else pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
	new_row = pd.DataFrame([[text, lang, sentiment, score]],
	columns=["Sentence", "Language", "Sentiment", "Confidence"])
	df = pd.concat([df, new_row], ignore_index=True)
	df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")

	return sentiment, str(score), explanation, SAVE_FILE

	# -----------------------------
	# Show Logs
	# -----------------------------
	def show_logs():
	if os.path.exists(SAVE_FILE):
	return pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
	else:
	return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])

	# -----------------------------
	# Gradio UI
	# -----------------------------
	with gr.Blocks() as demo:
	gr.Markdown(
	"## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
	"Detect Positive, Negative, or Neutral tone with confidence score.\n\n"
	"🪶 Improved Roman Urdu normalization + ensemble + polarity explanation.\n"
	)

	with gr.Row():
	with gr.Column():
	user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type English, Urdu, or Roman Urdu...")
	lang_dropdown = gr.Dropdown(
	["Auto Detect", "English", "Urdu", "Roman Urdu"],
	value="Auto Detect", label="🌐 Language"
	)
	btn_analyze = gr.Button("🔍 Analyze Sentiment")
	btn_show = gr.Button("📂 Show Saved Logs")

	with gr.Column():
	out_sent = gr.Textbox(label="Sentiment")
	out_conf = gr.Textbox(label="Confidence (0–1)")
	out_exp = gr.Textbox(label="Polarity Explanation")
	out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")

	logs_df = gr.Dataframe(
	headers=["Sentence", "Language", "Sentiment", "Confidence"],
	label="🧾 Sentiment Logs", interactive=False
	)

	btn_analyze.click(analyze_sentiment,
	inputs=[user_text, lang_dropdown],
	outputs=[out_sent, out_conf, out_exp, out_file])

	btn_show.click(show_logs, outputs=[logs_df])

	if __name__ == "__main__":
	demo.launch()