tahamueed23 commited on
Commit
3840bbb
·
verified ·
1 Parent(s): c12e35c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -65
app.py CHANGED
@@ -1,71 +1,50 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import pandas as pd
4
- import os
5
- import re
6
-
7
- # --- models (keep yours) ---
8
- english_model = pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english")
9
- urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
10
- roman_urdu_model = pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu")
11
-
12
- # --- CSV setup (UTF-8 with BOM) ---
13
- SAVE_FILE = "sentiment_logs.csv"
14
- if not os.path.exists(SAVE_FILE):
15
- df_init = pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
16
- df_init.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig") # write BOM + UTF-8
17
-
18
- # --- helper functions (keep yours) ---
19
- def detect_language(text):
20
- urdu_chars = set("ابتثجحخدذرزسشصضطظعغفقکلمنوہیءآؤئۀ")
21
- if any(ch in urdu_chars for ch in text):
22
- return "Urdu"
23
- roman_urdu_pattern = r"\b(hai|kia|kyun|nahi|bohot|acha|galat|sahi|parhai|ustad|pyar|dil|insaan)\b"
24
- if re.search(roman_urdu_pattern, text.lower()):
25
- return "Roman Urdu"
26
- return "English"
27
-
28
- def normalize_label(label):
29
- label = label.lower()
30
- if "positive" in label:
31
- return "Positive"
32
- elif "negative" in label:
33
- return "Negative"
34
  else:
35
- return "Neutral"
36
-
37
- def sentiment_with_tips(sentiment):
38
- tips = {
39
- "Positive": "😊 Great! Keep it up.",
40
- "Negative": "😞 Looks negative. Consider constructive changes.",
41
- "Neutral": "😐 Neutral — neither strongly positive nor negative."
42
- }
43
- return tips.get(sentiment, "")
44
-
45
- # --- main analyze function (writes UTF-8 rows safely) ---
46
- def analyze_sentiment(text, lang_hint):
47
- if not text or not text.strip():
48
- return "⚠️ Please enter a sentence.", "", "", SAVE_FILE
49
 
50
- lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)
51
-
52
- # Choose model
53
- if lang == "English":
54
- result = english_model(text)[0]
55
- elif lang == "Urdu":
56
- result = urdu_model(text)[0]
57
- else:
58
- result = roman_urdu_model(text)[0]
59
-
60
- sentiment = normalize_label(result["label"])
61
  score = round(result["score"], 3)
62
- explanation = sentiment_with_tips(sentiment)
63
-
64
- # Prepare row and append using utf-8-sig (Excel-friendly)
65
- new_row = pd.DataFrame([[text, lang, sentiment, score]],
66
- columns=["Sentence", "Language", "Sentiment", "Confidence"])
67
-
68
- # Append: header only if file missing (safe even if file exists)
69
- new_row.to_csv(SAVE_FILE, mode="a", index=False, header=not os.path.exists(SAVE_FILE), encoding="utf-8-sig")
70
 
71
- return sentiment, str(score), explanation, SAVE_FILE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import pandas as pd
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
4
+ from langdetect import detect
5
+
6
+ # Load models
7
+ models = {
8
+ "en": pipeline("sentiment-analysis", model="siebert/sentiment-roberta-large-english"),
9
+ "ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"), # replace with your trained Urdu model
10
+ "roman-ur": pipeline("sentiment-analysis", model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu") # replace with your trained Roman Urdu model
11
+ }
12
+
13
+ CSV_FILE = "sentiment_results.csv"
14
+
15
+ def analyze_text(text):
16
+ try:
17
+ lang = detect(text)
18
+ except:
19
+ lang = "en"
20
+
21
+ if lang.startswith("ur"):
22
+ model = models["ur"]
23
+ elif any(word.isascii() for word in text): # crude roman urdu check
24
+ model = models["roman-ur"]
 
 
 
 
 
 
 
 
25
  else:
26
+ model = models["en"]
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ result = model(text)[0]
29
+ sentiment = result["label"]
 
 
 
 
 
 
 
 
 
30
  score = round(result["score"], 3)
 
 
 
 
 
 
 
 
31
 
32
+ # Save to CSV
33
+ df = pd.DataFrame([[text, lang, sentiment, score]],
34
+ columns=["Sentence", "Language", "Sentiment", "Confidence"])
35
+ try:
36
+ old = pd.read_csv(CSV_FILE, encoding="utf-8-sig")
37
+ df = pd.concat([old, df], ignore_index=True)
38
+ except:
39
+ pass
40
+ df.to_csv(CSV_FILE, index=False, encoding="utf-8-sig")
41
+
42
+ return f"Language: {lang} | Sentiment: {sentiment} ({score})"
43
+
44
+ demo = gr.Interface(fn=analyze_text,
45
+ inputs="text",
46
+ outputs="text",
47
+ title="Multilingual Sentiment Analysis")
48
+
49
+ if __name__ == "__main__":
50
+ demo.launch()