Spaces:

Sa-m
/

manifesto-explainer

Running

App Files Files Community

Sa-m commited on Mar 26, 2022

Commit

dd57fb3

1 Parent(s): f9871f3

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -3

app.py CHANGED Viewed

@@ -114,6 +114,33 @@ def concordance(text_Party,strng):
   s=result.getvalue().splitlines()
   return result.getvalue()
 def normalize(d, target=1.0):
    raw = sum(d.values())
@@ -356,8 +383,8 @@ def analysis(Manifesto,Search):
   fdist_Party=fDistance(text_Party)
   img4=fDistancePlot(text_Party)
   img5=DispersionPlot(text_Party)
-  searchRes=concordance(text_Party,Search)
   searChRes=clean(searchRes)
   searChRes=searchRes.replace(Search,"\u0332".join(Search))
   return searChRes,fdist_Party,img1,img2,img3,img4,img5
@@ -373,7 +400,7 @@ plot3=gr.outputs.Image(label='Word Cloud')
 plot4=gr.outputs.Image(label='Frequency Distribution')
 plot5=gr.outputs.Image(label='Dispersion Plot')
-io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['manifestos/AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']],theme='peach')
 io.launch(debug=True,share=False)
 #,examples=[['./Bjp_Manifesto_2019.pdf','india'],['./AAP_Manifesto_2019.pdf',],['./Congress_Manifesto_2019.pdf',]]
 #allow_screenshot=False,    allow_flagging="never",

   s=result.getvalue().splitlines()
   return result.getvalue()
+def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin = 10, right_margin = 10):
+    """
+        Function to get all the phases that contain the target word in a text/passage tar_passage.
+        Workaround to save the output given by nltk Concordance function
+        str target_word, str tar_passage int left_margin int right_margin --> list of str
+        left_margin and right_margin allocate the number of words/pununciation before and after target word
+        Left margin will take note of the beginning of the text
+    """
+    ## Create list of tokens using nltk function
+    tokens = nltk.word_tokenize(tar_passage)
+    ## Create the text of tokens
+    text = nltk.Text(tokens)
+    ## Collect all the index or offset position of the target word
+    c = nltk.ConcordanceIndex(text.tokens, key = lambda s: s.lower())
+    ## Collect the range of the words that is within the target word by using text.tokens[start;end].
+    ## The map function is use so that when the offset position - the target range < 0, it will be default to zero
+    concordance_txt = ([text.tokens[list(map(lambda x: x-5 if (x-left_margin)>0 else 0,[offset]))[0]:offset+right_margin] for offset in c.offsets(target_word)])
+    ## join the sentences for each of the target phrase and return it
+    result = [''.join([x.replace("Y","")+' ' for x in con_sub]) for con_sub in concordance_txt][:-1]
+    res='. '.join(result)
+    return res
 def normalize(d, target=1.0):
    raw = sum(d.values())
   fdist_Party=fDistance(text_Party)
   img4=fDistancePlot(text_Party)
   img5=DispersionPlot(text_Party)
+  #concordance(text_Party,Search)
+  searchRes=get_all_phases_containing_tar_wrd(text_Party,Search)
   searChRes=clean(searchRes)
   searChRes=searchRes.replace(Search,"\u0332".join(Search))
   return searChRes,fdist_Party,img1,img2,img3,img4,img5
 plot4=gr.outputs.Image(label='Frequency Distribution')
 plot5=gr.outputs.Image(label='Dispersion Plot')
+io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['manifestos/AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']],theme='dark-peach')
 io.launch(debug=True,share=False)
 #,examples=[['./Bjp_Manifesto_2019.pdf','india'],['./AAP_Manifesto_2019.pdf',],['./Congress_Manifesto_2019.pdf',]]
 #allow_screenshot=False,    allow_flagging="never",