Spaces:

tahamueed23
/

Sentiment-Analyzer

Sleeping

Sentiment-Analyzer / app.py

Update app.py

3840bbb verified about 2 months ago

1.66 kB

	import gradio as gr
	import pandas as pd
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	from langdetect import detect

	# Load models
	models = {
	"en": pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english"),
	"ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"), # replace with your trained Urdu model
	"roman-ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu") # replace with your trained Roman Urdu model
	}

	CSV_FILE = "sentiment_results.csv"

	def analyze_text(text):
	try:
	lang = detect(text)
	except:
	lang = "en"

	if lang.startswith("ur"):
	model = models["ur"]
	elif any(word.isascii() for word in text): # crude roman urdu check
	model = models["roman-ur"]
	else:
	model = models["en"]

	result = model(text)[0]
	sentiment = result["label"]
	score = round(result["score"], 3)

	# Save to CSV
	df = pd.DataFrame([[text, lang, sentiment, score]],
	columns=["Sentence", "Language", "Sentiment", "Confidence"])
	try:
	old = pd.read_csv(CSV_FILE, encoding="utf-8-sig")
	df = pd.concat([old, df], ignore_index=True)
	except:
	pass
	df.to_csv(CSV_FILE, index=False, encoding="utf-8-sig")

	return f"Language: {lang} \| Sentiment: {sentiment} ({score})"

	demo = gr.Interface(fn=analyze_text,
	inputs="text",
	outputs="text",
	title="Multilingual Sentiment Analysis")

	if __name__ == "__main__":
	demo.launch()