Spaces:

Sa-m
/

manifesto-explainer

Running

App Files Files Community

Sa-m commited on Jun 12

Commit

029ffc9

verified ·

1 Parent(s): 8d7d358

Update app.py

Browse files

Files changed (1) hide show

app.py +561 -13

app.py CHANGED Viewed

@@ -1,3 +1,416 @@
 """
 # MANIFESTO ANALYSIS
 """
@@ -32,6 +445,14 @@ import gradio as gr
 from zipfile import ZipFile
 import contractions
 import unidecode
 nltk.download('punkt_tab')
 nltk.download('stopwords')
@@ -39,6 +460,18 @@ nltk.download('punkt')
 nltk.download('wordnet')
 nltk.download('words')
 """## PARSING FILES"""
@@ -149,16 +582,77 @@ def normalize(d, target=1.0):
    factor = target/raw
    return {key:value*factor for key,value in d.items()}
 def fDistance(text2Party):
   '''
-  Most frequent words search
   '''
   word_tokens_party = word_tokenize(text2Party) #Tokenizing
   fdistance = FreqDist(word_tokens_party).most_common(10)
   mem={}
   for x in fdistance:
     mem[x[0]]=x[1]
-  return normalize(mem)
 def fDistancePlot(text2Party,plotN=15):
   '''
@@ -352,7 +846,10 @@ urllib.request.urlretrieve(url, filename=path_input)
 def analysis(Manifesto,Search):
   raw_party = Parsing(Manifesto)
   text_Party=clean_text(raw_party)
-  text_Party= Preprocess(text_Party)
   df = pd.DataFrame(raw_party.split('\n'), columns=['Content'])
   df['Subjectivity'] = df['Content'].apply(getSubjectivity)
@@ -380,30 +877,81 @@ def analysis(Manifesto,Search):
   img2 = Image.open(buf)
   plt.clf()
-  img3 = word_cloud_generator(Manifesto.name,text_Party)
-  fdist_Party=fDistance(text_Party)
-  img4=fDistancePlot(text_Party)
-  img5=DispersionPlot(text_Party)
-  #concordance(text_Party,Search)
-  searChRes=get_all_phases_containing_tar_wrd(Search,text_Party)
   searChRes=searChRes.replace(Search,"\u0332".join(Search))
   plt.close('all')
-  return searChRes,fdist_Party,img1,img2,img3,img4,img5
 Search_txt= "text"
 filePdf = "file"
 text = gr.Textbox(label='Context Based Search')
-mfw=gr.Label(label="Most Relevant Topics")
 plot1=gr.Image(label='Sentiment Analysis')
 plot2=gr.Image(label='Subjectivity Analysis')
 plot3=gr.Image(label='Word Cloud')
 plot4=gr.Image(label='Frequency Distribution')
 plot5=gr.Image(label='Dispersion Plot')
-io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['Example/AAP_Manifesto_2019.pdf','government'],['Example/Bjp_Manifesto_2019.pdf','environment'],['Example/Congress_Manifesto_2019.pdf','safety']],theme='peach')
-io.launch(debug=True,share=False)
 #allow_screenshot=False,allow_flagging="never",

+# """
+# # MANIFESTO ANALYSIS
+# """
+# ##IMPORTING LIBRARIES
+# import random
+# import matplotlib.pyplot as plt
+# import nltk
+# from nltk.tokenize import word_tokenize,sent_tokenize
+# from nltk.corpus import stopwords
+# from nltk.stem.porter import PorterStemmer
+# from nltk.stem import WordNetLemmatizer
+# from nltk.corpus import stopwords
+# from nltk.tokenize import word_tokenize
+# from nltk.probability import FreqDist
+# from cleantext import clean
+# import textract
+# import urllib.request
+# import nltk.corpus
+# from nltk.text import Text
+# import io
+# from io import StringIO,BytesIO
+# import sys
+# import pandas as pd
+# import cv2
+# import re
+# from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
+# from textblob import TextBlob
+# from PIL import Image
+# import os
+# import gradio as gr
+# from zipfile import ZipFile
+# import contractions
+# import unidecode
+# nltk.download('punkt_tab')
+# nltk.download('stopwords')
+# nltk.download('punkt')
+# nltk.download('wordnet')
+# nltk.download('words')
+# """## PARSING FILES"""
+# #def Parsing(parsed_text):
+#   #parsed_text=parsed_text.name
+#   #raw_party =parser.from_file(parsed_text)
+#  # raw_party = raw_party['content'],cache_examples=True
+# #  return clean(raw_party)
+# def Parsing(parsed_text):
+#   parsed_text=parsed_text.name
+#   raw_party =textract.process(parsed_text, encoding='ascii',method='pdfminer')
+#   return clean(raw_party)
+# #Added more stopwords to avoid irrelevant terms
+# stop_words = set(stopwords.words('english'))
+# stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
+# """## PREPROCESSING"""
+# def clean_text(text):
+#   '''
+#   The function which returns clean text
+#   '''
+#   text = text.encode("ascii", errors="ignore").decode("ascii")  # remove non-asciicharacters
+#   text=unidecode.unidecode(text)# diacritics remove
+#   text=contractions.fix(text) # contraction fix
+#   text = re.sub(r"\n", " ", text)
+#   text = re.sub(r"\n\n", " ", text)
+#   text = re.sub(r"\t", " ", text)
+#   text = re.sub(r"/ ", " ", text)
+#   text = text.strip(" ")
+#   text = re.sub(" +", " ", text).strip()  # get rid of multiple spaces and replace with a single
+#   text = [word for word in text.split() if word not in stop_words]
+#   text = ' '.join(text)
+#   return text
+# # text_Party=clean_text(raw_party)
+# def Preprocess(textParty):
+#   '''
+#   Removing special characters extra spaces
+#   '''
+#   text1Party = re.sub('[^A-Za-z0-9]+', ' ', textParty)
+#   #Removing all stop words
+#   pattern = re.compile(r'\b(' + r'|'.join(stopwords.words('english')) + r')\b\s*')
+#   text2Party = pattern.sub('', text1Party)
+#   # fdist_cong = FreqDist(word_tokens_cong)
+#   return text2Party
+# '''
+#   Using Concordance, you can see each time a word is used, along with its
+#   immediate context. It can give you a peek into how a word is being used
+#   at the sentence level and what words are used with it
+# '''
+# def conc(text_Party,strng):
+#   word_tokens_party = word_tokenize(text_Party)
+#   moby = Text(word_tokens_party)
+#   resultList = []
+#   for i in range(0,1):
+#       save_stdout = sys.stdout
+#       result = StringIO()
+#       sys.stdout = result
+#       moby.concordance(strng,lines=4,width=82)
+#       sys.stdout = save_stdout
+#   s=result.getvalue().splitlines()
+#   return result.getvalue()
+# def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin = 10, right_margin = 10,numLins=4):
+#     """
+#         Function to get all the phases that contain the target word in a text/passage tar_passage.
+#         Workaround to save the output given by nltk Concordance function
+#         str target_word, str tar_passage int left_margin int right_margin --> list of str
+#         left_margin and right_margin allocate the number of words/pununciation before and after target word
+#         Left margin will take note of the beginning of the text
+#     """
+#     ## Create list of tokens using nltk function
+#     tokens = nltk.word_tokenize(tar_passage)
+#     ## Create the text of tokens
+#     text = nltk.Text(tokens)
+#     ## Collect all the index or offset position of the target word
+#     c = nltk.ConcordanceIndex(text.tokens, key = lambda s: s.lower())
+#     ## Collect the range of the words that is within the target word by using text.tokens[start;end].
+#     ## The map function is use so that when the offset position - the target range < 0, it will be default to zero
+#     concordance_txt = ([text.tokens[list(map(lambda x: x-5 if (x-left_margin)>0 else 0,[offset]))[0]:offset+right_margin] for offset in c.offsets(target_word)])
+#     ## join the sentences for each of the target phrase and return it
+#     result = [''.join([x.replace("Y","")+' ' for x in con_sub]) for con_sub in concordance_txt][:-1]
+#     result=result[:numLins+1]
+#     res='\n\n'.join(result)
+#     return res
+# def normalize(d, target=1.0):
+#    raw = sum(d.values())
+#    factor = target/raw
+#    return {key:value*factor for key,value in d.items()}
+# def fDistance(text2Party):
+#   '''
+#   Most frequent words search
+#   '''
+#   word_tokens_party = word_tokenize(text2Party) #Tokenizing
+#   fdistance = FreqDist(word_tokens_party).most_common(10)
+#   mem={}
+#   for x in fdistance:
+#     mem[x[0]]=x[1]
+#   return normalize(mem)
+# def fDistancePlot(text2Party,plotN=15):
+#   '''
+#   Most Frequent Words Visualization
+#   '''
+#   word_tokens_party = word_tokenize(text2Party) #Tokenizing
+#   fdistance = FreqDist(word_tokens_party)
+#   plt.title('Frequency Distribution')
+#   plt.axis('off')
+#   plt.figure(figsize=(4,3))
+#   fdistance.plot(plotN)
+#   plt.tight_layout()
+#   buf = BytesIO()
+#   plt.savefig(buf)
+#   buf.seek(0)
+#   img1 = Image.open(buf)
+#   plt.clf()
+#   return img1
+# def DispersionPlot(textParty):
+#   '''
+#   Dispersion PLot
+#   '''
+#   word_tokens_party = word_tokenize(textParty) #Tokenizing
+#   moby = Text(word_tokens_party)
+#   fdistance = FreqDist(word_tokens_party)
+#   word_Lst=[]
+#   for x in range(5):
+#     word_Lst.append(fdistance.most_common(6)[x][0])
+#   plt.axis('off')
+#   plt.figure(figsize=(4,3))
+#   plt.title('Dispersion Plot')
+#   moby.dispersion_plot(word_Lst)
+#   plt.plot(color="#EF6D6D")
+#   plt.tight_layout()
+#   buf = BytesIO()
+#   plt.savefig(buf)
+#   buf.seek(0)
+#   img = Image.open(buf)
+#   plt.clf()
+#   return img
+# def getSubjectivity(text):
+#   '''
+#   Create a function to get the polarity
+#   '''
+#   return TextBlob(text).sentiment.subjectivity
+# def getPolarity(text):
+#   '''
+#   Create a function to get the polarity
+#   '''
+#   return  TextBlob(text).sentiment.polarity
+# def getAnalysis(score):
+#   if score < 0:
+#     return 'Negative'
+#   elif score == 0:
+#     return 'Neutral'
+#   else:
+#     return 'Positive'
+# def Original_Image(path):
+#   img= cv2.imread(path)
+#   img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+#   return img
+# def Image_Processed(path):
+#   '''
+#   Reading the image file
+#   '''
+#   img= cv2.imread(path)
+#   img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+#   #Thresholding
+#   ret, bw_img = cv2.threshold(img, 124, 255, cv2.THRESH_BINARY)
+#   return bw_img
+# def word_cloud(orgIm,mask_img,text_Party_pr,maxWord=2000,colorGener=True,
+#     contCol='white',bckColor='white'):
+#   '''
+#   #Generating word cloud
+#   '''
+#   mask =mask_img
+#   # Create and generate a word cloud image:
+#   wordcloud = WordCloud(max_words=maxWord, background_color=bckColor,
+#                         mask=mask,
+#                         colormap='nipy_spectral_r',
+#                         contour_color=contCol,
+#                         width=800, height=800,
+#                         margin=2,
+#                         contour_width=3).generate(text_Party_pr)
+#   # create coloring from image
+#   plt.axis("off")
+#   if colorGener==True:
+#     image_colors = ImageColorGenerator(orgIm)
+#     plt.imshow(wordcloud.recolor(color_func= image_colors),interpolation="bilinear")
+#   else:
+#     plt.imshow(wordcloud)
+# def word_cloud_generator(parsed_text_name,text_Party):
+#   parsed=parsed_text_name.lower()
+#   if 'bjp' in parsed:
+#     orgImg=Original_Image('bjpImg2.jpeg')
+#     bwImg=Image_Processed('bjpImg2.jpeg')
+#     plt.figure(figsize=(6,5))
+#     word_cloud(orgImg,bwImg,text_Party,maxWord=3000,colorGener=True,
+#     contCol='white', bckColor='black')
+#     plt.tight_layout()
+#     buf = BytesIO()
+#     plt.savefig(buf)
+#     buf.seek(0)
+#     img1 = Image.open(buf)
+#     plt.clf()
+#     return img1
+#   elif 'congress' in parsed:
+#     orgImg=Original_Image('congress3.jpeg')
+#     bwImg=Image_Processed('congress3.jpeg')
+#     plt.figure(figsize=(5,4))
+#     word_cloud(orgImg,bwImg,text_Party,maxWord=3000,colorGener=True)
+#     plt.tight_layout()
+#     buf = BytesIO()
+#     plt.savefig(buf)
+#     buf.seek(0)
+#     img2 = Image.open(buf)
+#     plt.clf()
+#     return img2
+#     #congrsMain.jpg
+#   elif 'aap' in parsed:
+#     orgImg=Original_Image('aapMain2.jpg')
+#     bwImg=Image_Processed('aapMain2.jpg')
+#     plt.figure(figsize=(5,4))
+#     word_cloud(orgImg,bwImg,text_Party,maxWord=3000,colorGener=False,contCol='black')
+#     plt.tight_layout()
+#     buf = BytesIO()
+#     plt.savefig(buf)
+#     buf.seek(0)
+#     img3 = Image.open(buf)
+#     plt.clf()
+#     return img3
+#   else :
+#     wordcloud = WordCloud(max_words=2000, background_color="white",mode="RGB").generate(text_Party)
+#     plt.figure(figsize=(5,5))
+#     plt.imshow(wordcloud, interpolation="bilinear")
+#     plt.axis("off")
+#     plt.tight_layout()
+#     buf = BytesIO()
+#     plt.savefig(buf)
+#     buf.seek(0)
+#     img4 = Image.open(buf)
+#     plt.clf()
+#     return img4
+# '''
+# url = "http://library.bjp.org/jspui/bitstream/123456789/2988/1/BJP-Election-english-2019.pdf"
+# path_input = "./Bjp_Manifesto_2019.pdf"
+# urllib.request.urlretrieve(url, filename=path_input)
+# url="https://drive.google.com/uc?id=1BLCiy_BWilfVdrUH8kbO-44DJevwO5CG&export=download"
+# path_input = "./Aap_Manifesto_2019.pdf"
+# urllib.request.urlretrieve(url, filename=path_input)
+# url="https://drive.google.com/uc?id=1HVZvTtYntl0YKLnE0cwu0CvAIRhXOv60&export=download"
+# path_input = "./Congress_Manifesto_2019.pdf"
+# urllib.request.urlretrieve(url, filename=path_input)
+# '''
+# def analysis(Manifesto,Search):
+#   raw_party = Parsing(Manifesto)
+#   text_Party=clean_text(raw_party)
+#   text_Party= Preprocess(text_Party)
+#   df = pd.DataFrame(raw_party.split('\n'), columns=['Content'])
+#   df['Subjectivity'] = df['Content'].apply(getSubjectivity)
+#   df['Polarity'] = df['Content'].apply(getPolarity)
+#   df['Analysis on Polarity'] = df['Polarity'].apply(getAnalysis)
+#   df['Analysis on Subjectivity'] = df['Subjectivity'].apply(getAnalysis)
+#   plt.title('Sentiment Analysis')
+#   plt.xlabel('Sentiment')
+#   plt.ylabel('Counts')
+#   plt.figure(figsize=(4,3))
+#   df['Analysis on Polarity'].value_counts().plot(kind ='bar',color="#FF9F45")
+#   plt.tight_layout()
+#   buf = BytesIO()
+#   plt.savefig(buf)
+#   buf.seek(0)
+#   img1 = Image.open(buf)
+#   plt.clf()
+#   plt.figure(figsize=(4,3))
+#   df['Analysis on Subjectivity'].value_counts().plot(kind ='bar',color="#B667F1")
+#   plt.tight_layout()
+#   buf = BytesIO()
+#   plt.savefig(buf)
+#   buf.seek(0)
+#   img2 = Image.open(buf)
+#   plt.clf()
+#   img3 = word_cloud_generator(Manifesto.name,text_Party)
+#   fdist_Party=fDistance(text_Party)
+#   img4=fDistancePlot(text_Party)
+#   img5=DispersionPlot(text_Party)
+#   #concordance(text_Party,Search)
+#   searChRes=get_all_phases_containing_tar_wrd(Search,text_Party)
+#   searChRes=searChRes.replace(Search,"\u0332".join(Search))
+#   plt.close('all')
+#   return searChRes,fdist_Party,img1,img2,img3,img4,img5
+# Search_txt= "text"
+# filePdf = "file"
+# text = gr.Textbox(label='Context Based Search')
+# mfw=gr.Label(label="Most Relevant Topics")
+# plot1=gr.Image(label='Sentiment Analysis')
+# plot2=gr.Image(label='Subjectivity Analysis')
+# plot3=gr.Image(label='Word Cloud')
+# plot4=gr.Image(label='Frequency Distribution')
+# plot5=gr.Image(label='Dispersion Plot')
+# io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['Example/AAP_Manifesto_2019.pdf','government'],['Example/Bjp_Manifesto_2019.pdf','environment'],['Example/Congress_Manifesto_2019.pdf','safety']],theme='peach')
+# io.launch(debug=True,share=False)
+# #allow_screenshot=False,allow_flagging="never",
+# #examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']])
 """
 # MANIFESTO ANALYSIS
 """
 from zipfile import ZipFile
 import contractions
 import unidecode
+import groq
+import json
+from dotenv import load_dotenv
+from sklearn.feature_extraction.text import TfidfVectorizer
+from collections import Counter
+# Load environment variables from .env file
+load_dotenv()
 nltk.download('punkt_tab')
 nltk.download('stopwords')
 nltk.download('wordnet')
 nltk.download('words')
+# Initialize Groq client for LLM capabilities
+try:
+    groq_api_key = os.getenv("GROQ_API_KEY")
+    if groq_api_key:
+        groq_client = groq.Groq(api_key=groq_api_key)
+    else:
+        print("Warning: GROQ_API_KEY not found in environment variables. Summarization will be disabled.")
+        groq_client = None
+except Exception as e:
+    print(f"Error initializing Groq client: {e}")
+    groq_client = None
 """## PARSING FILES"""
    factor = target/raw
    return {key:value*factor for key,value in d.items()}
+def generate_summary(text, max_length=1000):
+    """
+    Generate a summary of the manifesto text using Groq LLM
+    """
+    if not groq_client:
+        return "Summarization is not available. Please set up your GROQ_API_KEY in the .env file."
+    # Truncate text if it's too long to fit in context window
+    if len(text) > 10000:
+        text = text[:10000]
+    try:
+        # Use Groq's LLaMA 3 model for summarization
+        completion = groq_client.chat.completions.create(
+            model="llama3-8b-8192",  # Using LLaMA 3 8B model
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant that summarizes political manifestos. Provide a concise, objective summary that captures the key policy proposals, themes, and promises in the manifesto."},
+                {"role": "user", "content": f"Please summarize the following political manifesto text in about 300-500 words, focusing on the main policy areas, promises, and themes:\n\n{text}"}
+            ],
+            temperature=0.3,  # Lower temperature for more focused output
+            max_tokens=800,   # Limit response length
+        )
+        return completion.choices[0].message.content
+    except Exception as e:
+        return f"Error generating summary: {str(e)}. Please check your API key and connection."
 def fDistance(text2Party):
   '''
+  Most frequent words search using TF-IDF to find more relevant words
   '''
+  # Traditional frequency distribution
   word_tokens_party = word_tokenize(text2Party) #Tokenizing
   fdistance = FreqDist(word_tokens_party).most_common(10)
   mem={}
   for x in fdistance:
     mem[x[0]]=x[1]
+  # Enhanced with TF-IDF for better relevance
+  sentences = sent_tokenize(text2Party)
+  # Use TF-IDF to find more relevant words
+  vectorizer = TfidfVectorizer(max_features=15, stop_words='english')
+  tfidf_matrix = vectorizer.fit_transform(sentences)
+  # Get feature names (words)
+  feature_names = vectorizer.get_feature_names_out()
+  # Calculate average TF-IDF score for each word across all sentences
+  tfidf_scores = {}
+  for i, word in enumerate(feature_names):
+      scores = [tfidf_matrix[j, i] for j in range(len(sentences)) if i < tfidf_matrix[j].shape[1]]
+      if scores:
+          tfidf_scores[word] = sum(scores) / len(scores)
+  # Sort by score and get top words
+  sorted_tfidf = dict(sorted(tfidf_scores.items(), key=lambda x: x[1], reverse=True)[:10])
+  # Combine traditional frequency with TF-IDF for better results
+  combined_scores = {}
+  for word in set(list(mem.keys()) + list(sorted_tfidf.keys())):
+      # Normalize and combine both scores (with more weight to TF-IDF)
+      freq_score = mem.get(word, 0) / max(mem.values()) if mem else 0
+      tfidf_score = sorted_tfidf.get(word, 0) / max(sorted_tfidf.values()) if sorted_tfidf else 0
+      combined_scores[word] = (freq_score * 0.3) + (tfidf_score * 0.7)  # Weight TF-IDF higher
+  # Get top 10 words by combined score
+  top_words = dict(sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)[:10])
+  return normalize(top_words)
 def fDistancePlot(text2Party,plotN=15):
   '''
 def analysis(Manifesto,Search):
   raw_party = Parsing(Manifesto)
   text_Party=clean_text(raw_party)
+  text_Party_processed = Preprocess(text_Party)
+  # Generate summary using LLM
+  summary = generate_summary(raw_party)
   df = pd.DataFrame(raw_party.split('\n'), columns=['Content'])
   df['Subjectivity'] = df['Content'].apply(getSubjectivity)
   img2 = Image.open(buf)
   plt.clf()
+  img3 = word_cloud_generator(Manifesto.name,text_Party_processed)
+  fdist_Party=fDistance(text_Party_processed)
+  img4=fDistancePlot(text_Party_processed)
+  img5=DispersionPlot(text_Party_processed)
+  searChRes=get_all_phases_containing_tar_wrd(Search,text_Party_processed)
   searChRes=searChRes.replace(Search,"\u0332".join(Search))
   plt.close('all')
+  return searChRes,fdist_Party,img1,img2,img3,img4,img5,summary
 Search_txt= "text"
 filePdf = "file"
 text = gr.Textbox(label='Context Based Search')
+mfw=gr.Label(label="Most Relevant Topics (LLM Enhanced)")
 plot1=gr.Image(label='Sentiment Analysis')
 plot2=gr.Image(label='Subjectivity Analysis')
 plot3=gr.Image(label='Word Cloud')
 plot4=gr.Image(label='Frequency Distribution')
 plot5=gr.Image(label='Dispersion Plot')
+summary_output = gr.Textbox(label='AI-Generated Summary', lines=10)
+with gr.Blocks(title='Manifesto Analysis', theme='peach') as demo:
+    gr.Markdown("# Manifesto Analysis with LLM Enhancement")
+    gr.Markdown("### Analyze political manifestos with advanced NLP and LLM techniques")
+    with gr.Row():
+        with gr.Column(scale=1):
+            file_input = gr.File(label="Upload Manifesto PDF", file_types=[".pdf"])
+            search_input = gr.Textbox(label="Search Term", placeholder="Enter a term to search in the manifesto")
+            submit_btn = gr.Button("Analyze Manifesto")
+    with gr.Tabs():
+        with gr.TabItem("Summary"):
+            summary_output
+        with gr.TabItem("Search Results"):
+            text
+        with gr.TabItem("Key Topics"):
+            mfw
+        with gr.TabItem("Visualizations"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    plot3
+                with gr.Column(scale=1):
+                    plot4
+            with gr.Row():
+                with gr.Column(scale=1):
+                    plot1
+                with gr.Column(scale=1):
+                    plot2
+            with gr.Row():
+                plot5
+    submit_btn.click(
+        fn=analysis,
+        inputs=[file_input, search_input],
+        outputs=[text, mfw, plot1, plot2, plot3, plot4, plot5, summary_output]
+    )
+    gr.Examples(
+        examples=[
+            ['Example/AAP_Manifesto_2019.pdf', 'government'],
+            ['Example/Bjp_Manifesto_2019.pdf', 'environment'],
+            ['Example/Congress_Manifesto_2019.pdf', 'safety']
+        ],
+        inputs=[file_input, search_input]
+    )
+demo.launch(debug=True, share=False)
 #allow_screenshot=False,allow_flagging="never",