Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -114,6 +114,33 @@ def concordance(text_Party,strng):
|
|
| 114 |
s=result.getvalue().splitlines()
|
| 115 |
return result.getvalue()
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
def normalize(d, target=1.0):
|
| 119 |
raw = sum(d.values())
|
|
@@ -356,8 +383,8 @@ def analysis(Manifesto,Search):
|
|
| 356 |
fdist_Party=fDistance(text_Party)
|
| 357 |
img4=fDistancePlot(text_Party)
|
| 358 |
img5=DispersionPlot(text_Party)
|
| 359 |
-
|
| 360 |
-
searchRes=
|
| 361 |
searChRes=clean(searchRes)
|
| 362 |
searChRes=searchRes.replace(Search,"\u0332".join(Search))
|
| 363 |
return searChRes,fdist_Party,img1,img2,img3,img4,img5
|
|
@@ -373,7 +400,7 @@ plot3=gr.outputs.Image(label='Word Cloud')
|
|
| 373 |
plot4=gr.outputs.Image(label='Frequency Distribution')
|
| 374 |
plot5=gr.outputs.Image(label='Dispersion Plot')
|
| 375 |
|
| 376 |
-
io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['manifestos/AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']],theme='peach')
|
| 377 |
io.launch(debug=True,share=False)
|
| 378 |
#,examples=[['./Bjp_Manifesto_2019.pdf','india'],['./AAP_Manifesto_2019.pdf',],['./Congress_Manifesto_2019.pdf',]]
|
| 379 |
#allow_screenshot=False, allow_flagging="never",
|
|
|
|
| 114 |
s=result.getvalue().splitlines()
|
| 115 |
return result.getvalue()
|
| 116 |
|
| 117 |
+
def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin = 10, right_margin = 10):
|
| 118 |
+
"""
|
| 119 |
+
Function to get all the phases that contain the target word in a text/passage tar_passage.
|
| 120 |
+
Workaround to save the output given by nltk Concordance function
|
| 121 |
+
|
| 122 |
+
str target_word, str tar_passage int left_margin int right_margin --> list of str
|
| 123 |
+
left_margin and right_margin allocate the number of words/pununciation before and after target word
|
| 124 |
+
Left margin will take note of the beginning of the text
|
| 125 |
+
"""
|
| 126 |
+
## Create list of tokens using nltk function
|
| 127 |
+
tokens = nltk.word_tokenize(tar_passage)
|
| 128 |
+
|
| 129 |
+
## Create the text of tokens
|
| 130 |
+
text = nltk.Text(tokens)
|
| 131 |
+
|
| 132 |
+
## Collect all the index or offset position of the target word
|
| 133 |
+
c = nltk.ConcordanceIndex(text.tokens, key = lambda s: s.lower())
|
| 134 |
+
|
| 135 |
+
## Collect the range of the words that is within the target word by using text.tokens[start;end].
|
| 136 |
+
## The map function is use so that when the offset position - the target range < 0, it will be default to zero
|
| 137 |
+
concordance_txt = ([text.tokens[list(map(lambda x: x-5 if (x-left_margin)>0 else 0,[offset]))[0]:offset+right_margin] for offset in c.offsets(target_word)])
|
| 138 |
+
|
| 139 |
+
## join the sentences for each of the target phrase and return it
|
| 140 |
+
result = [''.join([x.replace("Y","")+' ' for x in con_sub]) for con_sub in concordance_txt][:-1]
|
| 141 |
+
res='. '.join(result)
|
| 142 |
+
return res
|
| 143 |
+
|
| 144 |
|
| 145 |
def normalize(d, target=1.0):
|
| 146 |
raw = sum(d.values())
|
|
|
|
| 383 |
fdist_Party=fDistance(text_Party)
|
| 384 |
img4=fDistancePlot(text_Party)
|
| 385 |
img5=DispersionPlot(text_Party)
|
| 386 |
+
#concordance(text_Party,Search)
|
| 387 |
+
searchRes=get_all_phases_containing_tar_wrd(text_Party,Search)
|
| 388 |
searChRes=clean(searchRes)
|
| 389 |
searChRes=searchRes.replace(Search,"\u0332".join(Search))
|
| 390 |
return searChRes,fdist_Party,img1,img2,img3,img4,img5
|
|
|
|
| 400 |
plot4=gr.outputs.Image(label='Frequency Distribution')
|
| 401 |
plot5=gr.outputs.Image(label='Dispersion Plot')
|
| 402 |
|
| 403 |
+
io=gr.Interface(fn=analysis, inputs=[filePdf,Search_txt], outputs=[text,mfw,plot1,plot2,plot3,plot4,plot5], title='Manifesto Analysis',examples=[['manifestos/Bjp_Manifesto_2019.pdf','modi'],['manifestos/AAP_Manifesto_2019.pdf','delhi'],['manifestos/Congress_Manifesto_2019.pdf','safety']],theme='dark-peach')
|
| 404 |
io.launch(debug=True,share=False)
|
| 405 |
#,examples=[['./Bjp_Manifesto_2019.pdf','india'],['./AAP_Manifesto_2019.pdf',],['./Congress_Manifesto_2019.pdf',]]
|
| 406 |
#allow_screenshot=False, allow_flagging="never",
|