Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
| from langdetect import detect | |
| # Load models | |
| models = { | |
| "en": pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english"), | |
| "ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"), # replace with your trained Urdu model | |
| "roman-ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu") # replace with your trained Roman Urdu model | |
| } | |
| CSV_FILE = "sentiment_results.csv" | |
| def analyze_text(text): | |
| try: | |
| lang = detect(text) | |
| except: | |
| lang = "en" | |
| if lang.startswith("ur"): | |
| model = models["ur"] | |
| elif any(word.isascii() for word in text): # crude roman urdu check | |
| model = models["roman-ur"] | |
| else: | |
| model = models["en"] | |
| result = model(text)[0] | |
| sentiment = result["label"] | |
| score = round(result["score"], 3) | |
| # Save to CSV | |
| df = pd.DataFrame([[text, lang, sentiment, score]], | |
| columns=["Sentence", "Language", "Sentiment", "Confidence"]) | |
| try: | |
| old = pd.read_csv(CSV_FILE, encoding="utf-8-sig") | |
| df = pd.concat([old, df], ignore_index=True) | |
| except: | |
| pass | |
| df.to_csv(CSV_FILE, index=False, encoding="utf-8-sig") | |
| return f"Language: {lang} | Sentiment: {sentiment} ({score})" | |
| demo = gr.Interface(fn=analyze_text, | |
| inputs="text", | |
| outputs="text", | |
| title="Multilingual Sentiment Analysis") | |
| if __name__ == "__main__": | |
| demo.launch() | |