File size: 1,660 Bytes
ea7b9be
 
3840bbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82a1819
3840bbb
82a1819
3840bbb
 
82a1819
ea7b9be
3840bbb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import pandas as pd
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from langdetect import detect

# Load models
models = {
    "en": pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english"),
    "ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"),  # replace with your trained Urdu model
    "roman-ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")  # replace with your trained Roman Urdu model
}

CSV_FILE = "sentiment_results.csv"

def analyze_text(text):
    try:
        lang = detect(text)
    except:
        lang = "en"

    if lang.startswith("ur"):
        model = models["ur"]
    elif any(word.isascii() for word in text):  # crude roman urdu check
        model = models["roman-ur"]
    else:
        model = models["en"]

    result = model(text)[0]
    sentiment = result["label"]
    score = round(result["score"], 3)

    # Save to CSV
    df = pd.DataFrame([[text, lang, sentiment, score]], 
                      columns=["Sentence", "Language", "Sentiment", "Confidence"])
    try:
        old = pd.read_csv(CSV_FILE, encoding="utf-8-sig")
        df = pd.concat([old, df], ignore_index=True)
    except:
        pass
    df.to_csv(CSV_FILE, index=False, encoding="utf-8-sig")

    return f"Language: {lang} | Sentiment: {sentiment} ({score})"

demo = gr.Interface(fn=analyze_text, 
                    inputs="text", 
                    outputs="text", 
                    title="Multilingual Sentiment Analysis")

if __name__ == "__main__":
    demo.launch()