Spaces:
Sleeping
Sleeping
File size: 1,660 Bytes
ea7b9be 3840bbb 82a1819 3840bbb 82a1819 3840bbb 82a1819 ea7b9be 3840bbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import gradio as gr
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from langdetect import detect
# Load models
models = {
"en": pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english"),
"ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"), # replace with your trained Urdu model
"roman-ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu") # replace with your trained Roman Urdu model
}
CSV_FILE = "sentiment_results.csv"
def analyze_text(text):
try:
lang = detect(text)
except:
lang = "en"
if lang.startswith("ur"):
model = models["ur"]
elif any(word.isascii() for word in text): # crude roman urdu check
model = models["roman-ur"]
else:
model = models["en"]
result = model(text)[0]
sentiment = result["label"]
score = round(result["score"], 3)
# Save to CSV
df = pd.DataFrame([[text, lang, sentiment, score]],
columns=["Sentence", "Language", "Sentiment", "Confidence"])
try:
old = pd.read_csv(CSV_FILE, encoding="utf-8-sig")
df = pd.concat([old, df], ignore_index=True)
except:
pass
df.to_csv(CSV_FILE, index=False, encoding="utf-8-sig")
return f"Language: {lang} | Sentiment: {sentiment} ({score})"
demo = gr.Interface(fn=analyze_text,
inputs="text",
outputs="text",
title="Multilingual Sentiment Analysis")
if __name__ == "__main__":
demo.launch()
|