Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -33,7 +33,7 @@ from PyPDF2 import PdfReader
|
|
| 33 |
from templates import bot_template, css, user_template
|
| 34 |
from xml.etree import ElementTree as ET
|
| 35 |
|
| 36 |
-
#
|
| 37 |
API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama
|
| 38 |
API_KEY = os.getenv('API_KEY')
|
| 39 |
headers = {
|
|
@@ -44,11 +44,9 @@ key = os.getenv('OPENAI_API_KEY')
|
|
| 44 |
prompt = f"Write instructions to teach anyone to write a discharge plan. List the entities, features and relationships to CCDA and FHIR objects in boldface."
|
| 45 |
# page config and sidebar declares up front allow all other functions to see global class variables
|
| 46 |
st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
|
| 47 |
-
|
| 48 |
-
# UI Controls
|
| 49 |
should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.")
|
| 50 |
|
| 51 |
-
#
|
| 52 |
def add_witty_humor_buttons():
|
| 53 |
with st.expander("Wit and Humor 🤣", expanded=True):
|
| 54 |
# Tip about the Dromedary family
|
|
@@ -95,7 +93,7 @@ def add_witty_humor_buttons():
|
|
| 95 |
StreamLLMChatResponse(descriptions["More Funny Rhymes 🎙️"])
|
| 96 |
|
| 97 |
|
| 98 |
-
#
|
| 99 |
@st.cache_resource
|
| 100 |
def StreamLLMChatResponse(prompt):
|
| 101 |
|
|
@@ -151,14 +149,8 @@ def StreamLLMChatResponse(prompt):
|
|
| 151 |
<h1>🔊 Read It Aloud</h1>
|
| 152 |
<textarea id="textArea" rows="10" cols="80">
|
| 153 |
'''
|
| 154 |
-
|
| 155 |
documentHTML5 = documentHTML5 + result
|
| 156 |
-
|
| 157 |
documentHTML5 = documentHTML5 + '''
|
| 158 |
-
This is a text passage that will be read aloud if Streamlit wrapping HTML5 wrapping browser based speech works correctly.
|
| 159 |
-
|
| 160 |
-
If all works this html can be nested into a variable and then put into session using streamlit and HTML5.
|
| 161 |
-
|
| 162 |
</textarea>
|
| 163 |
<br>
|
| 164 |
<button onclick="readAloud()">🔊 Read Aloud</button>
|
|
@@ -166,31 +158,30 @@ def StreamLLMChatResponse(prompt):
|
|
| 166 |
</html>
|
| 167 |
'''
|
| 168 |
|
| 169 |
-
|
| 170 |
import streamlit.components.v1 as components # Import Streamlit
|
| 171 |
components.html(documentHTML5, width=1280, height=1024)
|
| 172 |
-
|
| 173 |
-
|
| 174 |
return result
|
| 175 |
except:
|
| 176 |
-
st.write('
|
| 177 |
|
|
|
|
| 178 |
@st.cache_resource
|
| 179 |
def query(payload):
|
| 180 |
response = requests.post(API_URL, headers=headers, json=payload)
|
| 181 |
st.markdown(response.json())
|
| 182 |
return response.json()
|
| 183 |
-
|
| 184 |
def get_output(prompt):
|
| 185 |
return query({"inputs": prompt})
|
| 186 |
|
|
|
|
| 187 |
def generate_filename(prompt, file_type):
|
| 188 |
central = pytz.timezone('US/Central')
|
| 189 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
| 190 |
replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
|
| 191 |
-
safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:
|
| 192 |
return f"{safe_date_time}_{safe_prompt}.{file_type}"
|
| 193 |
|
|
|
|
| 194 |
@st.cache_resource
|
| 195 |
def transcribe_audio(openai_key, file_path, model):
|
| 196 |
openai.api_key = openai_key
|
|
@@ -215,6 +206,7 @@ def transcribe_audio(openai_key, file_path, model):
|
|
| 215 |
st.error("Error in API call.")
|
| 216 |
return None
|
| 217 |
|
|
|
|
| 218 |
def save_and_play_audio(audio_recorder):
|
| 219 |
audio_bytes = audio_recorder(key='audio_recorder')
|
| 220 |
if audio_bytes:
|
|
@@ -225,6 +217,7 @@ def save_and_play_audio(audio_recorder):
|
|
| 225 |
return filename
|
| 226 |
return None
|
| 227 |
|
|
|
|
| 228 |
@st.cache_resource
|
| 229 |
def create_file(filename, prompt, response, should_save=True):
|
| 230 |
if not should_save:
|
|
@@ -232,24 +225,26 @@ def create_file(filename, prompt, response, should_save=True):
|
|
| 232 |
base_filename, ext = os.path.splitext(filename)
|
| 233 |
has_python_code = bool(re.search(r"```python([\s\S]*?)```", response))
|
| 234 |
if ext in ['.txt', '.htm', '.md']:
|
| 235 |
-
#with open(f"{base_filename}-Prompt.txt", 'w') as file:
|
| 236 |
-
# file.write(prompt.strip())
|
| 237 |
-
#with open(f"{base_filename}-Response.md", 'w') as file:
|
| 238 |
-
# file.write(response)
|
| 239 |
with open(f"{base_filename}.md", 'w') as file:
|
| 240 |
-
|
|
|
|
| 241 |
|
| 242 |
if has_python_code:
|
| 243 |
python_code = re.findall(r"```python([\s\S]*?)```", response)[0].strip()
|
|
|
|
| 244 |
with open(f"{base_filename}-Code.py", 'w') as file:
|
| 245 |
file.write(python_code)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
def truncate_document(document, length):
|
| 248 |
return document[:length]
|
| 249 |
-
|
| 250 |
def divide_document(document, max_length):
|
| 251 |
return [document[i:i+max_length] for i in range(0, len(document), max_length)]
|
| 252 |
|
|
|
|
| 253 |
@st.cache_resource
|
| 254 |
def get_table_download_link(file_path):
|
| 255 |
with open(file_path, 'r') as file:
|
|
@@ -278,13 +273,15 @@ def get_table_download_link(file_path):
|
|
| 278 |
href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
|
| 279 |
return href
|
| 280 |
|
|
|
|
| 281 |
def CompressXML(xml_text):
|
| 282 |
root = ET.fromstring(xml_text)
|
| 283 |
for elem in list(root.iter()):
|
| 284 |
if isinstance(elem.tag, str) and 'Comment' in elem.tag:
|
| 285 |
elem.parent.remove(elem)
|
| 286 |
return ET.tostring(root, encoding='unicode', method="xml")
|
| 287 |
-
|
|
|
|
| 288 |
@st.cache_resource
|
| 289 |
def read_file_content(file,max_length):
|
| 290 |
if file.type == "application/json":
|
|
@@ -307,6 +304,7 @@ def read_file_content(file,max_length):
|
|
| 307 |
else:
|
| 308 |
return ""
|
| 309 |
|
|
|
|
| 310 |
@st.cache_resource
|
| 311 |
def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
|
| 312 |
model = model_choice
|
|
@@ -336,6 +334,7 @@ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
|
|
| 336 |
st.write(time.time() - start_time)
|
| 337 |
return full_reply_content
|
| 338 |
|
|
|
|
| 339 |
@st.cache_resource
|
| 340 |
def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
|
| 341 |
conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
|
|
@@ -368,6 +367,7 @@ def extract_file_extension(file):
|
|
| 368 |
else:
|
| 369 |
raise ValueError(f"Unable to extract file extension from {file_name}")
|
| 370 |
|
|
|
|
| 371 |
@st.cache_resource
|
| 372 |
def pdf2txt(docs):
|
| 373 |
text = ""
|
|
@@ -390,11 +390,13 @@ def txt2chunks(text):
|
|
| 390 |
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
|
| 391 |
return text_splitter.split_text(text)
|
| 392 |
|
|
|
|
| 393 |
@st.cache_resource
|
| 394 |
def vector_store(text_chunks):
|
| 395 |
embeddings = OpenAIEmbeddings(openai_api_key=key)
|
| 396 |
return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 397 |
|
|
|
|
| 398 |
@st.cache_resource
|
| 399 |
def get_chain(vectorstore):
|
| 400 |
llm = ChatOpenAI()
|
|
@@ -428,6 +430,7 @@ def divide_prompt(prompt, max_length):
|
|
| 428 |
chunks.append(' '.join(current_chunk))
|
| 429 |
return chunks
|
| 430 |
|
|
|
|
| 431 |
@st.cache_resource
|
| 432 |
def create_zip_of_files(files):
|
| 433 |
zip_name = "all_files.zip"
|
|
@@ -435,7 +438,6 @@ def create_zip_of_files(files):
|
|
| 435 |
for file in files:
|
| 436 |
zipf.write(file)
|
| 437 |
return zip_name
|
| 438 |
-
|
| 439 |
@st.cache_resource
|
| 440 |
def get_zip_download_link(zip_file):
|
| 441 |
with open(zip_file, 'rb') as f:
|
|
@@ -444,7 +446,7 @@ def get_zip_download_link(zip_file):
|
|
| 444 |
href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
|
| 445 |
return href
|
| 446 |
|
| 447 |
-
|
| 448 |
API_URL_IE = f'https://tonpixzfvq3791u9.us-east-1.aws.endpoints.huggingface.cloud'
|
| 449 |
headers = {
|
| 450 |
"Authorization": "Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
|
|
@@ -465,7 +467,7 @@ def generate_filename(prompt, file_type):
|
|
| 465 |
safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90]
|
| 466 |
return f"{safe_date_time}_{safe_prompt}.{file_type}"
|
| 467 |
|
| 468 |
-
#
|
| 469 |
def save_and_play_audio(audio_recorder):
|
| 470 |
audio_bytes = audio_recorder()
|
| 471 |
if audio_bytes:
|
|
@@ -475,7 +477,7 @@ def save_and_play_audio(audio_recorder):
|
|
| 475 |
st.audio(audio_bytes, format="audio/wav")
|
| 476 |
return filename
|
| 477 |
|
| 478 |
-
#
|
| 479 |
def transcribe_audio(filename):
|
| 480 |
output = query(filename)
|
| 481 |
return output
|
|
@@ -500,6 +502,8 @@ def whisper_main():
|
|
| 500 |
create_file(filename, transcription, response, should_save)
|
| 501 |
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
| 502 |
|
|
|
|
|
|
|
| 503 |
def main():
|
| 504 |
|
| 505 |
st.title("AI Drome Llama")
|
|
@@ -646,6 +650,7 @@ def main():
|
|
| 646 |
filename = generate_filename(raw, 'txt')
|
| 647 |
create_file(filename, raw, '', should_save)
|
| 648 |
|
|
|
|
| 649 |
if __name__ == "__main__":
|
| 650 |
whisper_main()
|
| 651 |
main()
|
|
|
|
| 33 |
from templates import bot_template, css, user_template
|
| 34 |
from xml.etree import ElementTree as ET
|
| 35 |
|
| 36 |
+
# 1. Constants and Top Level UI Variables
|
| 37 |
API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' # Dr Llama
|
| 38 |
API_KEY = os.getenv('API_KEY')
|
| 39 |
headers = {
|
|
|
|
| 44 |
prompt = f"Write instructions to teach anyone to write a discharge plan. List the entities, features and relationships to CCDA and FHIR objects in boldface."
|
| 45 |
# page config and sidebar declares up front allow all other functions to see global class variables
|
| 46 |
st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
|
|
|
|
|
|
|
| 47 |
should_save = st.sidebar.checkbox("💾 Save", value=True, help="Save your session data.")
|
| 48 |
|
| 49 |
+
# 2. Prompt label button demo for LLM
|
| 50 |
def add_witty_humor_buttons():
|
| 51 |
with st.expander("Wit and Humor 🤣", expanded=True):
|
| 52 |
# Tip about the Dromedary family
|
|
|
|
| 93 |
StreamLLMChatResponse(descriptions["More Funny Rhymes 🎙️"])
|
| 94 |
|
| 95 |
|
| 96 |
+
# 3. Stream Llama Response
|
| 97 |
@st.cache_resource
|
| 98 |
def StreamLLMChatResponse(prompt):
|
| 99 |
|
|
|
|
| 149 |
<h1>🔊 Read It Aloud</h1>
|
| 150 |
<textarea id="textArea" rows="10" cols="80">
|
| 151 |
'''
|
|
|
|
| 152 |
documentHTML5 = documentHTML5 + result
|
|
|
|
| 153 |
documentHTML5 = documentHTML5 + '''
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
</textarea>
|
| 155 |
<br>
|
| 156 |
<button onclick="readAloud()">🔊 Read Aloud</button>
|
|
|
|
| 158 |
</html>
|
| 159 |
'''
|
| 160 |
|
|
|
|
| 161 |
import streamlit.components.v1 as components # Import Streamlit
|
| 162 |
components.html(documentHTML5, width=1280, height=1024)
|
|
|
|
|
|
|
| 163 |
return result
|
| 164 |
except:
|
| 165 |
+
st.write('Llama model is asleep. Starting up now on A10 - please give 5 minutes then retry as KEDA scales up from zero to activate running container(s).')
|
| 166 |
|
| 167 |
+
# 4. Run query with payload
|
| 168 |
@st.cache_resource
|
| 169 |
def query(payload):
|
| 170 |
response = requests.post(API_URL, headers=headers, json=payload)
|
| 171 |
st.markdown(response.json())
|
| 172 |
return response.json()
|
|
|
|
| 173 |
def get_output(prompt):
|
| 174 |
return query({"inputs": prompt})
|
| 175 |
|
| 176 |
+
# 5. Auto name generated output files from time and content
|
| 177 |
def generate_filename(prompt, file_type):
|
| 178 |
central = pytz.timezone('US/Central')
|
| 179 |
safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
|
| 180 |
replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
|
| 181 |
+
safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:45]
|
| 182 |
return f"{safe_date_time}_{safe_prompt}.{file_type}"
|
| 183 |
|
| 184 |
+
# 6. Speech transcription via OpenAI service
|
| 185 |
@st.cache_resource
|
| 186 |
def transcribe_audio(openai_key, file_path, model):
|
| 187 |
openai.api_key = openai_key
|
|
|
|
| 206 |
st.error("Error in API call.")
|
| 207 |
return None
|
| 208 |
|
| 209 |
+
# 7. Auto stop on silence audio control for recording WAV files
|
| 210 |
def save_and_play_audio(audio_recorder):
|
| 211 |
audio_bytes = audio_recorder(key='audio_recorder')
|
| 212 |
if audio_bytes:
|
|
|
|
| 217 |
return filename
|
| 218 |
return None
|
| 219 |
|
| 220 |
+
# 8. File creator that interprets type and creates output file for text, markdown and code
|
| 221 |
@st.cache_resource
|
| 222 |
def create_file(filename, prompt, response, should_save=True):
|
| 223 |
if not should_save:
|
|
|
|
| 225 |
base_filename, ext = os.path.splitext(filename)
|
| 226 |
has_python_code = bool(re.search(r"```python([\s\S]*?)```", response))
|
| 227 |
if ext in ['.txt', '.htm', '.md']:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
with open(f"{base_filename}.md", 'w') as file:
|
| 229 |
+
content = prompt.strip() + '\r\n' + response
|
| 230 |
+
file.write(content)
|
| 231 |
|
| 232 |
if has_python_code:
|
| 233 |
python_code = re.findall(r"```python([\s\S]*?)```", response)[0].strip()
|
| 234 |
+
# one copy with runnable section - tee up with python process shell
|
| 235 |
with open(f"{base_filename}-Code.py", 'w') as file:
|
| 236 |
file.write(python_code)
|
| 237 |
+
# second copy with markdown of prompt and response
|
| 238 |
+
with open(f"{base_filename}.md", 'w') as file:
|
| 239 |
+
content = prompt.strip() + '\r\n' + response
|
| 240 |
+
file.write(content)
|
| 241 |
|
| 242 |
def truncate_document(document, length):
|
| 243 |
return document[:length]
|
|
|
|
| 244 |
def divide_document(document, max_length):
|
| 245 |
return [document[i:i+max_length] for i in range(0, len(document), max_length)]
|
| 246 |
|
| 247 |
+
# 9. Sidebar with UI controls to review and re-run prompts and continue responses
|
| 248 |
@st.cache_resource
|
| 249 |
def get_table_download_link(file_path):
|
| 250 |
with open(file_path, 'r') as file:
|
|
|
|
| 273 |
href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
|
| 274 |
return href
|
| 275 |
|
| 276 |
+
|
| 277 |
def CompressXML(xml_text):
|
| 278 |
root = ET.fromstring(xml_text)
|
| 279 |
for elem in list(root.iter()):
|
| 280 |
if isinstance(elem.tag, str) and 'Comment' in elem.tag:
|
| 281 |
elem.parent.remove(elem)
|
| 282 |
return ET.tostring(root, encoding='unicode', method="xml")
|
| 283 |
+
|
| 284 |
+
# 10. Read in and provide UI for past files
|
| 285 |
@st.cache_resource
|
| 286 |
def read_file_content(file,max_length):
|
| 287 |
if file.type == "application/json":
|
|
|
|
| 304 |
else:
|
| 305 |
return ""
|
| 306 |
|
| 307 |
+
# 11. Chat with GPT - Caution on quota - now favoring fastest AI pipeline STT Whisper->LLM Llama->TTS
|
| 308 |
@st.cache_resource
|
| 309 |
def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
|
| 310 |
model = model_choice
|
|
|
|
| 334 |
st.write(time.time() - start_time)
|
| 335 |
return full_reply_content
|
| 336 |
|
| 337 |
+
# 12. Embedding VectorDB for LLM query of documents to text to compress inputs and prompt together as Chat memory using Langchain
|
| 338 |
@st.cache_resource
|
| 339 |
def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
|
| 340 |
conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
|
|
|
|
| 367 |
else:
|
| 368 |
raise ValueError(f"Unable to extract file extension from {file_name}")
|
| 369 |
|
| 370 |
+
# Normalize input as text from PDF and other formats
|
| 371 |
@st.cache_resource
|
| 372 |
def pdf2txt(docs):
|
| 373 |
text = ""
|
|
|
|
| 390 |
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
|
| 391 |
return text_splitter.split_text(text)
|
| 392 |
|
| 393 |
+
# Vector Store using FAISS
|
| 394 |
@st.cache_resource
|
| 395 |
def vector_store(text_chunks):
|
| 396 |
embeddings = OpenAIEmbeddings(openai_api_key=key)
|
| 397 |
return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 398 |
|
| 399 |
+
# Memory and Retrieval chains
|
| 400 |
@st.cache_resource
|
| 401 |
def get_chain(vectorstore):
|
| 402 |
llm = ChatOpenAI()
|
|
|
|
| 430 |
chunks.append(' '.join(current_chunk))
|
| 431 |
return chunks
|
| 432 |
|
| 433 |
+
# 13. Provide way of saving all and deleting all to give way of reviewing output and saving locally before clearing it
|
| 434 |
@st.cache_resource
|
| 435 |
def create_zip_of_files(files):
|
| 436 |
zip_name = "all_files.zip"
|
|
|
|
| 438 |
for file in files:
|
| 439 |
zipf.write(file)
|
| 440 |
return zip_name
|
|
|
|
| 441 |
@st.cache_resource
|
| 442 |
def get_zip_download_link(zip_file):
|
| 443 |
with open(zip_file, 'rb') as f:
|
|
|
|
| 446 |
href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
|
| 447 |
return href
|
| 448 |
|
| 449 |
+
# 14. Inference Endpoints for Whisper (best fastest STT) on NVIDIA T4 and Llama (best fastest AGI LLM) on NVIDIA A10
|
| 450 |
API_URL_IE = f'https://tonpixzfvq3791u9.us-east-1.aws.endpoints.huggingface.cloud'
|
| 451 |
headers = {
|
| 452 |
"Authorization": "Bearer XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
|
|
|
|
| 467 |
safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90]
|
| 468 |
return f"{safe_date_time}_{safe_prompt}.{file_type}"
|
| 469 |
|
| 470 |
+
# 15. Audio recorder to Wav file
|
| 471 |
def save_and_play_audio(audio_recorder):
|
| 472 |
audio_bytes = audio_recorder()
|
| 473 |
if audio_bytes:
|
|
|
|
| 477 |
st.audio(audio_bytes, format="audio/wav")
|
| 478 |
return filename
|
| 479 |
|
| 480 |
+
# 16. Speech transcription to file output
|
| 481 |
def transcribe_audio(filename):
|
| 482 |
output = query(filename)
|
| 483 |
return output
|
|
|
|
| 502 |
create_file(filename, transcription, response, should_save)
|
| 503 |
#st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
|
| 504 |
|
| 505 |
+
|
| 506 |
+
# 17. Main
|
| 507 |
def main():
|
| 508 |
|
| 509 |
st.title("AI Drome Llama")
|
|
|
|
| 650 |
filename = generate_filename(raw, 'txt')
|
| 651 |
create_file(filename, raw, '', should_save)
|
| 652 |
|
| 653 |
+
# 18. Run AI Pipeline
|
| 654 |
if __name__ == "__main__":
|
| 655 |
whisper_main()
|
| 656 |
main()
|