Spaces:

Sa-m
/

manifesto-explainer

Running

App Files Files Community

Sa-m commited on Feb 4, 2022

Commit

1726132

1 Parent(s): a82e853

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -32

app.py CHANGED Viewed

@@ -1,19 +1,9 @@
 # -*- coding: utf-8 -*-
 """
 # MANIFESTO ANALYSIS
-## IMPORTING LIBRARIES
 """
-# Commented out IPython magic to ensure Python compatibility.
-# %%capture
-# !pip install tika
-# !pip install clean-text
-# !pip install gradio
-# Commented out IPython magic to ensure Python compatibility.
 import random
 import matplotlib.pyplot as plt
 import nltk
@@ -21,14 +11,11 @@ from nltk.tokenize import word_tokenize,sent_tokenize
 from nltk.corpus import stopwords
 from nltk.stem.porter import PorterStemmer
 from nltk.stem import WordNetLemmatizer
-#import tika
-#from tika import parser
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from nltk.probability import FreqDist
 from cleantext import clean
 import textract
 import urllib.request
 import nltk.corpus
 from nltk.text import Text
@@ -38,7 +25,6 @@ import sys
 import pandas as pd
 import cv2
 import re
 from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
 from textblob import TextBlob
 from PIL import Image
@@ -52,7 +38,6 @@ import unidecode
 nltk.download('stopwords')
 nltk.download('punkt')
 nltk.download('wordnet')
-nltk.download('averaged_perceptron_tagger')
 nltk.download('words')
@@ -111,10 +96,11 @@ def Preprocess(textParty):
-# Using Concordance,you can see each time a word is used, along with its
-# immediate context. It can give you a peek into how a word is being used
-# at the sentence level and what words are used with it.
 def concordance(text_Party,strng):
   word_tokens_party = word_tokenize(text_Party)
   moby = Text(word_tokens_party)
@@ -136,7 +122,7 @@ def normalize(d, target=1.0):
 def fDistance(text2Party):
   '''
-  most frequent words search
   '''
   word_tokens_party = word_tokenize(text2Party) #Tokenizing
   fdistance = FreqDist(word_tokens_party).most_common(10)
@@ -188,7 +174,6 @@ def getAnalysis(score):
   else:
     return 'Positive'
-#http://library.bjp.org/jspui/bitstream/123456789/2988/1/BJP-Election-english-2019.pdf
 url = "http://library.bjp.org/jspui/bitstream/123456789/2988/1/BJP-Election-english-2019.pdf"
 path_input = "./Bjp_Manifesto_2019.pdf'"
 urllib.request.urlretrieve(url, filename=path_input)
@@ -216,8 +201,6 @@ def analysis(Manifesto,Search):
   plt.ylabel('Counts')
   plt.figure(figsize=(4,3))
   df['Analysis on Polarity'].value_counts().plot(kind ='bar')
-  #plt.savefig('./sentimentAnalysis.png')
-  #plt.clf()
   plt.tight_layout()
   buf = BytesIO()
   plt.savefig(buf)
@@ -227,8 +210,6 @@ def analysis(Manifesto,Search):
   plt.figure(figsize=(4,3))
   df['Analysis on Subjectivity'].value_counts().plot(kind ='bar')
-  #plt.savefig('sentimentAnalysis2.png')
-  #plt.clf()
   plt.tight_layout()
   buf = BytesIO()
   plt.savefig(buf)
@@ -249,11 +230,6 @@ def analysis(Manifesto,Search):
   fdist_Party=fDistance(text_Party)
   img4=fDistancePlot(text_Party)
-  #img1=cv2.imread('/sentimentAnalysis.png')
-  #img2=cv2.imread('/wordcloud.png')
-  #img3=cv2.imread('/wordcloud.png')
-  #img4=cv2.imread('/distplot.png')
   searchRes=concordance(text_Party,Search)
   searChRes=clean(searchRes)
@@ -265,7 +241,6 @@ Search_txt=gr.inputs.Textbox()
 filePdf = gr.inputs.File()
 text = gr.outputs.Textbox(label='SEARCHED OUTPUT')
 mfw=gr.outputs.Label(label="Most Relevant Topics")
-# mfw2=gr.outputs.Image(label="Most Relevant Topics Plot")
 plot1=gr.outputs. Image(label='Sentiment Analysis')
 plot2=gr.outputs.Image(label='Subjectivity Analysis')
 plot3=gr.outputs.Image(label='Word Cloud')

 # -*- coding: utf-8 -*-
 """
 # MANIFESTO ANALYSIS
 """
+##IMPORTING LIBRARIES
 import random
 import matplotlib.pyplot as plt
 import nltk
 from nltk.corpus import stopwords
 from nltk.stem.porter import PorterStemmer
 from nltk.stem import WordNetLemmatizer
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 from nltk.probability import FreqDist
 from cleantext import clean
 import textract
 import urllib.request
 import nltk.corpus
 from nltk.text import Text
 import pandas as pd
 import cv2
 import re
 from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
 from textblob import TextBlob
 from PIL import Image
 nltk.download('stopwords')
 nltk.download('punkt')
 nltk.download('wordnet')
 nltk.download('words')
+'''
+  Using Concordance, you can see each time a word is used, along with its
+  immediate context. It can give you a peek into how a word is being used
+  at the sentence level and what words are used with it.
+'''
 def concordance(text_Party,strng):
   word_tokens_party = word_tokenize(text_Party)
   moby = Text(word_tokens_party)
 def fDistance(text2Party):
   '''
+  Most frequent words search
   '''
   word_tokens_party = word_tokenize(text2Party) #Tokenizing
   fdistance = FreqDist(word_tokens_party).most_common(10)
   else:
     return 'Positive'
 url = "http://library.bjp.org/jspui/bitstream/123456789/2988/1/BJP-Election-english-2019.pdf"
 path_input = "./Bjp_Manifesto_2019.pdf'"
 urllib.request.urlretrieve(url, filename=path_input)
   plt.ylabel('Counts')
   plt.figure(figsize=(4,3))
   df['Analysis on Polarity'].value_counts().plot(kind ='bar')
   plt.tight_layout()
   buf = BytesIO()
   plt.savefig(buf)
   plt.figure(figsize=(4,3))
   df['Analysis on Subjectivity'].value_counts().plot(kind ='bar')
   plt.tight_layout()
   buf = BytesIO()
   plt.savefig(buf)
   fdist_Party=fDistance(text_Party)
   img4=fDistancePlot(text_Party)
   searchRes=concordance(text_Party,Search)
   searChRes=clean(searchRes)
 filePdf = gr.inputs.File()
 text = gr.outputs.Textbox(label='SEARCHED OUTPUT')
 mfw=gr.outputs.Label(label="Most Relevant Topics")
 plot1=gr.outputs. Image(label='Sentiment Analysis')
 plot2=gr.outputs.Image(label='Subjectivity Analysis')
 plot3=gr.outputs.Image(label='Word Cloud')