tahamueed23 commited on
Commit
cd458ad
·
verified ·
1 Parent(s): 3840bbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -40
app.py CHANGED
@@ -1,50 +1,131 @@
1
  import gradio as gr
2
- import pandas as pd
3
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
4
- from langdetect import detect
5
-
6
- # Load models
7
- models = {
8
- "en": pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english"),
9
- "ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"), # replace with your trained Urdu model
10
- "roman-ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu") # replace with your trained Roman Urdu model
11
- }
12
-
13
- CSV_FILE = "sentiment_results.csv"
14
-
15
- def analyze_text(text):
16
- try:
17
- lang = detect(text)
18
- except:
19
- lang = "en"
20
-
21
- if lang.startswith("ur"):
22
- model = models["ur"]
23
- elif any(word.isascii() for word in text): # crude roman urdu check
24
- model = models["roman-ur"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  else:
26
- model = models["en"]
27
 
28
- result = model(text)[0]
29
- sentiment = result["label"]
30
  score = round(result["score"], 3)
 
31
 
32
  # Save to CSV
33
- df = pd.DataFrame([[text, lang, sentiment, score]],
34
- columns=["Sentence", "Language", "Sentiment", "Confidence"])
35
- try:
36
- old = pd.read_csv(CSV_FILE, encoding="utf-8-sig")
37
- df = pd.concat([old, df], ignore_index=True)
38
- except:
39
- pass
40
- df.to_csv(CSV_FILE, index=False, encoding="utf-8-sig")
41
-
42
- return f"Language: {lang} | Sentiment: {sentiment} ({score})"
43
-
44
- demo = gr.Interface(fn=analyze_text,
45
- inputs="text",
46
- outputs="text",
47
- title="Multilingual Sentiment Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  if __name__ == "__main__":
50
  demo.launch()
 
1
  import gradio as gr
 
2
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
3
+ import pandas as pd
4
+ import os
5
+ import re
6
+
7
+ # -----------------------------
8
+ # Load Models
9
+ # -----------------------------
10
+ english_model = pipeline(
11
+ "sentiment-analysis",
12
+ model="siebert/sentiment-roberta-large-english"
13
+ )
14
+
15
+ # Replace with your own fine-tuned models
16
+ urdu_model = pipeline(
17
+ "sentiment-analysis",
18
+ model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
19
+ )
20
+
21
+ roman_urdu_model = pipeline(
22
+ "sentiment-analysis",
23
+ model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
24
+ )
25
+
26
+ # -----------------------------
27
+ # CSV Setup
28
+ # -----------------------------
29
+ SAVE_FILE = "sentiment_logs.csv"
30
+ if not os.path.exists(SAVE_FILE):
31
+ df = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
32
+ df.to_csv(SAVE_FILE, index=False)
33
+
34
+ # -----------------------------
35
+ # Language Detection (simple rule-based)
36
+ # -----------------------------
37
+ def detect_language(text):
38
+ urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
39
+ if any(ch in urdu_chars for ch in text):
40
+ return "Urdu"
41
+ roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad|pyar|dil|insaan)\b"
42
+ if re.search(roman_urdu_pattern, text.lower()):
43
+ return "Roman Urdu"
44
+ return "English"
45
+
46
+ # -----------------------------
47
+ # Normalize Sentiment Labels
48
+ # -----------------------------
49
+ def normalize_label(label):
50
+ label = label.lower()
51
+ if "positive" in label:
52
+ return "Positive"
53
+ elif "negative" in label:
54
+ return "Negative"
55
+ else:
56
+ return "Neutral"
57
+
58
+ # -----------------------------
59
+ # Add Emojis + Tips
60
+ # -----------------------------
61
+ def sentiment_with_tips(sentiment):
62
+ tips = {
63
+ "Positive": "😊 Great! Keep spreading positivity.",
64
+ "Negative": "😞 It seems negative. Try to focus on solutions.",
65
+ "Neutral": "😐 Neutral statement. Could go either way."
66
+ }
67
+ return tips.get(sentiment, "")
68
+
69
+ # -----------------------------
70
+ # Main Sentiment Function
71
+ # -----------------------------
72
+ def analyze_sentiment(text, lang_hint):
73
+ if not text.strip():
74
+ return "⚠️ Please enter a sentence.", "", "", SAVE_FILE
75
+
76
+ # Auto detect if language hint is not clear
77
+ lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)
78
+
79
+ # Select model
80
+ if lang == "English":
81
+ result = english_model(text)[0]
82
+ elif lang == "Urdu":
83
+ result = urdu_model(text)[0]
84
  else:
85
+ result = roman_urdu_model(text)[0]
86
 
87
+ # Process results
88
+ sentiment = normalize_label(result["label"])
89
  score = round(result["score"], 3)
90
+ explanation = sentiment_with_tips(sentiment)
91
 
92
  # Save to CSV
93
+ df = pd.read_csv(SAVE_FILE)
94
+ new_row = pd.DataFrame([[text, lang, sentiment, score]],
95
+ columns=["Sentence", "Language", "Sentiment", "Confidence"])
96
+ df = pd.concat([df, new_row], ignore_index=True)
97
+ df.to_csv(SAVE_FILE, index=False)
98
+
99
+ return f"{sentiment}", f"{score}", f"{explanation}", SAVE_FILE
100
+
101
+ # -----------------------------
102
+ # Gradio UI
103
+ # -----------------------------
104
+ with gr.Blocks() as demo:
105
+ gr.Markdown(
106
+ "## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
107
+ "Detect sentiment as **Positive, Neutral, or Negative** with confidence score.\n\n"
108
+ "📌 Features:\n"
109
+ "- Choose language (or Auto Detect)\n"
110
+ "- Download all results as CSV\n"
111
+ "- Emojis + Tips for better understanding 🎯"
112
+ )
113
+
114
+ with gr.Row():
115
+ with gr.Column():
116
+ user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type in English, Urdu, or Roman Urdu...")
117
+ lang_dropdown = gr.Dropdown(["Auto Detect", "English", "Urdu", "Roman Urdu"],
118
+ label="🌐 Language", value="Auto Detect")
119
+ btn = gr.Button("🔍 Analyze")
120
+
121
+ with gr.Column():
122
+ out_sent = gr.Textbox(label="Sentiment")
123
+ out_conf = gr.Textbox(label="Confidence (0–1)")
124
+ out_exp = gr.Textbox(label="Explanation")
125
+ out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")
126
+
127
+ btn.click(analyze_sentiment, inputs=[user_text, lang_dropdown],
128
+ outputs=[out_sent, out_conf, out_exp, out_file])
129
 
130
  if __name__ == "__main__":
131
  demo.launch()