Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Sleeping

App Files Files Community

Soumen commited on Nov 4, 2022

Commit

9c37e72

1 Parent(s): 0553808

initial commit

Browse files

Files changed (5) hide show

app.py +145 -0
images.png +0 -0
packages.txt +1 -0
requirements.txt +16 -0
scholarly_text.jpg +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,145 @@

+"""
+## App: NLP App with Streamlit
+Credits: Streamlit Team,Marc Skov Madsen(For Awesome-streamlit gallery)
+Description
+This is a Natural Language Processing(NLP) Based App useful for basic NLP concepts such as follows;
++ Tokenization & Lemmatization using Spacy
++ Named Entity Recognition(NER) using SpaCy
++ Sentiment Analysis using TextBlob
++ Document/Text Summarization using Gensim/T5
+This is built with Streamlit Framework, an awesome framework for building ML and NLP tools.
+Purpose
+To perform basic and useful NLP task with Streamlit, Spacy, Textblob and Gensim
+"""
+# Core Pkgs
+import streamlit as st
+import os
+import torch
+from transformers import AutoTokenizer, AutoModelWithLMHead
+# NLP Pkgs
+from textblob import TextBlob
+import spacy
+from gensim.summarization import summarize
+import requests
+import cv2
+import numpy as np
+import pytesseract
+pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+from PIL import Image
+# Function to Analyse Tokens and Lemma
+tokenizer = AutoTokenizer.from_pretrained('t5-base')
+model = AutoModelWithLMHead.from_pretrained('t5-base', return_dict=True)
+@st.cache
+def text_analyzer(my_text):
+	nlp = spacy.load('en_core_web_sm')
+	docx = nlp(my_text)
+	# tokens = [ token.text for token in docx]
+	allData = [('"Token":{},\n"Lemma":{}'.format(token.text,token.lemma_))for token in docx ]
+	return allData
+# Function For Extracting Entities
+@st.cache
+def entity_analyzer(my_text):
+	nlp = spacy.load('en_core_web_sm')
+	docx = nlp(my_text)
+	tokens = [ token.text for token in docx]
+	entities = [(entity.text,entity.label_)for entity in docx.ents]
+	allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
+	return allData
+def main():
+	""" NLP Based App with Streamlit """
+	# Title
+	st.title("Streamlit NLP APP")
+	st.markdown("""
+    	#### Description
+    	+ This is a Natural Language Processing(NLP) Based App useful for basic NLP task
+         NER,Sentiment, Spell Corrections and Summarization
+    	""")
+	# Entity Extraction
+	if st.checkbox("Show Named Entities"):
+		st.subheader("Analyze Your Text")
+		message = st.text_area("Enter your Text","Typing Here ..")
+		if st.button("Extract"):
+			entity_result = entity_analyzer(message)
+			st.json(entity_result)
+	# Sentiment Analysis
+	elif st.checkbox("Show Sentiment Analysis"):
+		st.subheader("Analyse Your Text")
+		message = st.text_area("Enter Text plz","Type Here .")
+		if st.button("Analyze"):
+			blob = TextBlob(message)
+			result_sentiment = blob.sentiment
+			st.success(result_sentiment)
+	#Text Corrections
+	elif st.checkbox("Spell Corrections"):
+		st.subheader("Correct Your Text")
+		message = st.text_area("Enter the Text","Type please ..")
+		if st.button("Spell Corrections"):
+			st.text("Using TextBlob ..")
+			st.success(TextBlob(message).correct())
+	def change_photo_state():
+		st.session_state["photo"]="done"
+	st.subheader("Summary section, feed your image!")
+	camera_photo = st.camera_input("Take a photo", on_change=change_photo_state)
+	uploaded_photo = st.file_uploader("Upload Image",type=['jpg','png','jpeg'], on_change=change_photo_state)
+	message = st.text_input("Or, drop your text here!")
+	if "photo" not in st.session_state:
+		st.session_state["photo"]="not done"
+	if st.session_state["photo"]=="done" or message:
+		if uploaded_photo:
+			img = Image.open(uploaded_photo)
+			img = img.save("img.png")
+			img = cv2.imread("img.png")
+			text = pytesseract.image_to_string(img)
+			st.success(text)
+		if camera_photo:
+			img = Image.open(camera_photo)
+			img = img.save("img.png")
+			img = cv2.imread("img.png")
+			text = pytesseract.image_to_string(img)
+			st.success(text)
+		if uploaded_photo==None and camera_photo==None:
+			#our_image=load_image("image.jpg")
+			#img = cv2.imread("scholarly_text.jpg")
+			text = message
+		# Summarization
+		if st.checkbox("Show Text Summarization Genism"):
+			st.subheader("Summarize Your Text")
+			#message = st.text_area("Enter the Text","Type please ..")
+			st.text("Using Gensim Summarizer ..")
+			#st.success(mess)
+			summary_result = summarize(text)
+			st.success(summary_result)
+		elif st.checkbox("Show Text Summarization T5"):
+			st.subheader("Summarize Your Text")
+			#message = st.text_area("Enter the Text","Type please ..")
+			st.text("Using Google T5 Transformer ..")
+			inputs = tokenizer.encode("summarize: " + text,
+						return_tensors='pt',
+										max_length=512,
+										truncation=True)
+			summary_ids = model.generate(inputs, max_length=150, min_length=80, length_penalty=5., num_beams=2)
+			summary = tokenizer.decode(summary_ids[0])
+			st.success(summary)
+	st.sidebar.subheader("About App")
+	st.sidebar.subheader("By")
+	st.sidebar.text("Soumen Sarker")
+if __name__ == '__main__':
+	main()

images.png ADDED Viewed

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ tesseract-ocr-all

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+torch
+transformers
+nltk==3.6.5
+wordnet
+gensim==3.8.3
+joblib==1.1.0
+numpy==1.21.4
+pandas==1.3.4
+scikit-learn==1.0.1
+spacy==3.2.0
+streamlit==1.2.0
+textblob==0.17.1
+request
+pytesseract
+opencv-python
+Pillow

scholarly_text.jpg ADDED Viewed