tahamueed23's picture
Update app.py
3840bbb verified
raw
history blame
1.66 kB
import gradio as gr
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from langdetect import detect
# Load models
models = {
"en": pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english"),
"ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"), # replace with your trained Urdu model
"roman-ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu") # replace with your trained Roman Urdu model
}
CSV_FILE = "sentiment_results.csv"
def analyze_text(text):
try:
lang = detect(text)
except:
lang = "en"
if lang.startswith("ur"):
model = models["ur"]
elif any(word.isascii() for word in text): # crude roman urdu check
model = models["roman-ur"]
else:
model = models["en"]
result = model(text)[0]
sentiment = result["label"]
score = round(result["score"], 3)
# Save to CSV
df = pd.DataFrame([[text, lang, sentiment, score]],
columns=["Sentence", "Language", "Sentiment", "Confidence"])
try:
old = pd.read_csv(CSV_FILE, encoding="utf-8-sig")
df = pd.concat([old, df], ignore_index=True)
except:
pass
df.to_csv(CSV_FILE, index=False, encoding="utf-8-sig")
return f"Language: {lang} | Sentiment: {sentiment} ({score})"
demo = gr.Interface(fn=analyze_text,
inputs="text",
outputs="text",
title="Multilingual Sentiment Analysis")
if __name__ == "__main__":
demo.launch()