Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -55,14 +55,18 @@ nltk.download('words')
|
|
| 55 |
|
| 56 |
"""## PARSING FILES"""
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
def Parsing(parsed_text):
|
| 59 |
parsed_text=parsed_text.name
|
| 60 |
-
raw_party =
|
| 61 |
-
raw_party = raw_party['content']
|
| 62 |
return clean(raw_party)
|
| 63 |
|
| 64 |
|
| 65 |
-
|
| 66 |
#Added more stopwords to avoid irrelevant terms
|
| 67 |
stop_words = set(stopwords.words('english'))
|
| 68 |
stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
|
|
|
|
| 55 |
|
| 56 |
"""## PARSING FILES"""
|
| 57 |
|
| 58 |
+
#def Parsing(parsed_text):
|
| 59 |
+
#parsed_text=parsed_text.name
|
| 60 |
+
#raw_party =parser.from_file(parsed_text)
|
| 61 |
+
# raw_party = raw_party['content']
|
| 62 |
+
# return clean(raw_party)
|
| 63 |
+
|
| 64 |
def Parsing(parsed_text):
|
| 65 |
parsed_text=parsed_text.name
|
| 66 |
+
raw_party =textract.process(parsed_text, encoding='ascii',method='pdfminer')
|
|
|
|
| 67 |
return clean(raw_party)
|
| 68 |
|
| 69 |
|
|
|
|
| 70 |
#Added more stopwords to avoid irrelevant terms
|
| 71 |
stop_words = set(stopwords.words('english'))
|
| 72 |
stop_words.update('ask','much','thank','etc.', 'e', 'We', 'In', 'ed','pa', 'This','also', 'A', 'fu','To','5','ing', 'er', '2')
|