Spaces:

Soumen
/

Text-Summarization-and-NLP-tasks

Sleeping

App Files Files Community

Soumen commited on Nov 13, 2022

Commit

c75cc74

1 Parent(s): d05f84f

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -10

app.py CHANGED Viewed

@@ -25,8 +25,13 @@ import os
 #os.system('gunzip ben.traineddata.gz ')
 #os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/')
 #os.system('pip install -q pytesseract')
-import streamlit as st
-import os
 import torch
 from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
@@ -64,28 +69,99 @@ def entity_analyzer(my_text):
 	entities = [(entity.text,entity.label_)for entity in docx.ents]
 	allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
 	return allData
 def main():
 	""" NLP Based App with Streamlit """
 	st.markdown("""
     	#### Description
     	This is a Natural Language Processing(NLP) Based App useful for basic NLP task
          NER,Sentiment, Spell Corrections and Summarization
-    	""")
 	# Entity Extraction
-	message = st.text_area("type here ..")
 	if st.checkbox("Show Named Entities"):
 		st.subheader("Analyze Your Text")
 		if st.button("Extract"):
-			entity_result = entity_analyzer(message)
 			st.json(entity_result)
 	# Sentiment Analysis
 	if st.checkbox("Show Sentiment Analysis"):
 		st.subheader("Analyse Your Text")
 		if st.button("Analyze"):
-			blob = TextBlob(message)
 			result_sentiment = blob.sentiment
 			st.success(result_sentiment)
     #Text Corrections
@@ -93,13 +169,13 @@ def main():
 		st.subheader("Correct Your Text")
 		if st.button("Spell Corrections"):
 			st.text("Using TextBlob ..")
-			st.success(TextBlob(message).correct())
 	if st.checkbox("Text Generation"):
 		st.subheader("Generate Text")
 		ok = st.button("Generate")
 		tokenizer, model = load_models()
 		if ok:
-		    input_ids = tokenizer(message, return_tensors='pt').input_ids
 		    st.text("Using Hugging Face Transformer, Contrastive Search ..")
 		    output = model.generate(input_ids, max_length=128)
 		    st.success(tokenizer.decode(output[0], skip_special_tokens=True))

 #os.system('gunzip ben.traineddata.gz ')
 #os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/')
 #os.system('pip install -q pytesseract')
+import streamlit as st
+import websockets
+import pyaudio
+from configure import api_key
+import json
+import asyncio
 import torch
 from transformers import AutoTokenizer, AutoModelWithLMHead, GPT2LMHeadModel
 	entities = [(entity.text,entity.label_)for entity in docx.ents]
 	allData = ['"Token":{},\n"Entities":{}'.format(tokens,entities)]
 	return allData
 def main():
 	""" NLP Based App with Streamlit """
 	st.markdown("""
     	#### Description
     	This is a Natural Language Processing(NLP) Based App useful for basic NLP task
          NER,Sentiment, Spell Corrections and Summarization
+    	""")
+    if "text" not in st.session_state:
+        st.session_state["text"] = ""
+        st.session_state["run"] = False
+    def start_listening():
+        st.session_state["run"] = True
+    st.button("Say something", on_click=start_listening)
+    text = st.text_input("What should I create?", value=st.session_state["text"])
+    URL = "wss://api.assemblyai.com/v2/realtime/ws?sample_rate=16000"
+    FRAMES_PER_BUFFER = 3200
+    FORMAT = pyaudio.paInt16
+    CHANNELS = 1
+    RATE = 16000
+    p = pyaudio.PyAudio()
+    # starts recording
+    stream = p.open(
+    format=FORMAT,
+    channels=CHANNELS,
+    rate=RATE,
+    input=True,
+    frames_per_buffer=FRAMES_PER_BUFFER
+    )
+    async def send_receive():
+          print(f'Connecting websocket to url ${URL}')
+    async with websockets.connect(
+		URL,
+		extra_headers=(("Authorization", api_key),),
+		ping_interval=5,
+		ping_timeout=20
+	) as _ws:
+        r = await asyncio.sleep(0.1)
+        print("Receiving Session begins ...")
+        session_begins = await _ws.recv()
+        async def send():
+            while st.session_state['run']:
+                try:
+                    data = stream.read(FRAMES_PER_BUFFER)
+                    data = base64.b64encode(data).decode("utf-8")
+                    json_data = json.dumps({"audio_data":str(data)})
+                    r = await _ws.send(json_data)
+                except websockets.exceptions.ConnectionClosedError as e:
+                    print(e)
+                    assert e.code == 4008
+                    break
+                except Exception as e:
+                    print(e)
+                    assert False, "Not a websocket 4008 error"
+                r = await asyncio.sleep(0.01)
+        async def receive():
+            while st.session_state['run']:
+                try:
+                    result_str = await _ws.recv()
+                    result = json.loads(result_str)['text']
+                    if json.loads(result_str)['message_type'] == 'FinalTranscript':
+                        result = result.replace('.', '')
+                        result = result.replace('!', '')
+                        st.session_state['text'] = result
+                        st.session_state['run'] = False
+                        st.experimental_rerun()
+                except websockets.exceptions.ConnectionClosedError as e:
+                    print(e)
+                    assert e.code == 4008
+                    break
+                except Exception as e:
+                    print(e)
+                    assert False, "Not a websocket 4008 error"
+        send_result, receive_result = await asyncio.gather(send(), receive())
 	# Entity Extraction
 	if st.checkbox("Show Named Entities"):
 		st.subheader("Analyze Your Text")
 		if st.button("Extract"):
+			entity_result = entity_analyzer(text)
 			st.json(entity_result)
 	# Sentiment Analysis
 	if st.checkbox("Show Sentiment Analysis"):
 		st.subheader("Analyse Your Text")
 		if st.button("Analyze"):
+			blob = TextBlob(text)
 			result_sentiment = blob.sentiment
 			st.success(result_sentiment)
     #Text Corrections
 		st.subheader("Correct Your Text")
 		if st.button("Spell Corrections"):
 			st.text("Using TextBlob ..")
+			st.success(TextBlob(text).correct())
 	if st.checkbox("Text Generation"):
 		st.subheader("Generate Text")
 		ok = st.button("Generate")
 		tokenizer, model = load_models()
 		if ok:
+		    input_ids = tokenizer(text, return_tensors='pt').input_ids
 		    st.text("Using Hugging Face Transformer, Contrastive Search ..")
 		    output = model.generate(input_ids, max_length=128)
 		    st.success(tokenizer.decode(output[0], skip_special_tokens=True))