Spaces:
Running
Running
adibak
commited on
Commit
·
1e2c128
1
Parent(s):
da5eb4d
remove upload zone, allow chat uploads, set & update slider range
Browse files- app.py +31 -101
- helpers/file_manager.py +7 -1
app.py
CHANGED
|
@@ -13,10 +13,8 @@ import httpx
|
|
| 13 |
import huggingface_hub
|
| 14 |
import json5
|
| 15 |
import ollama
|
| 16 |
-
from pypdf import PdfReader
|
| 17 |
import requests
|
| 18 |
import streamlit as st
|
| 19 |
-
from streamlit_float import * # for floating UI elements
|
| 20 |
from dotenv import load_dotenv
|
| 21 |
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
|
| 22 |
from langchain_core.messages import HumanMessage
|
|
@@ -31,7 +29,6 @@ load_dotenv()
|
|
| 31 |
|
| 32 |
RUN_IN_OFFLINE_MODE = os.getenv('RUN_IN_OFFLINE_MODE', 'False').lower() == 'true'
|
| 33 |
|
| 34 |
-
float_init() # Initialize streamlit_float
|
| 35 |
|
| 36 |
@st.cache_data
|
| 37 |
def _load_strings() -> dict:
|
|
@@ -145,6 +142,7 @@ DOWNLOAD_FILE_KEY = 'download_file_name'
|
|
| 145 |
IS_IT_REFINEMENT = 'is_it_refinement'
|
| 146 |
ADDITIONAL_INFO = 'additional_info'
|
| 147 |
|
|
|
|
| 148 |
logger = logging.getLogger(__name__)
|
| 149 |
|
| 150 |
texts = list(GlobalConfig.PPTX_TEMPLATE_FILES.keys())
|
|
@@ -224,6 +222,11 @@ with st.sidebar:
|
|
| 224 |
value='2024-05-01-preview',
|
| 225 |
)
|
| 226 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
def build_ui():
|
| 228 |
"""
|
| 229 |
Display the input elements for content generation.
|
|
@@ -251,47 +254,12 @@ def build_ui():
|
|
| 251 |
|
| 252 |
set_up_chat_ui()
|
| 253 |
|
| 254 |
-
def apply_custom_css():
|
| 255 |
-
# Custom CSS so that the file upload area is kind of transparent, remains near the bottom but is
|
| 256 |
-
# a little enlarged for ease of use, and the extra things that are normally part of st.file_uploader,
|
| 257 |
-
# i.e. the "Drag and Drop File Here" label, the pdf's name and size label, upload icon, and browse files button,
|
| 258 |
-
# are hidden. What this CSS does is produce a simple 'zone' that the user can click or drop a file on.
|
| 259 |
-
st.markdown(
|
| 260 |
-
'''
|
| 261 |
-
<style>
|
| 262 |
-
|
| 263 |
-
div[data-testid="stFileUploader"]{
|
| 264 |
-
position:relative;
|
| 265 |
-
opacity:0.5;
|
| 266 |
-
width:200%;
|
| 267 |
-
height:100px;
|
| 268 |
-
left:-105%;
|
| 269 |
-
}
|
| 270 |
-
section[data-testid="stFileUploaderDropzone"]{
|
| 271 |
-
position:absolute;
|
| 272 |
-
width:100%;
|
| 273 |
-
height:100%;
|
| 274 |
-
top:0;
|
| 275 |
-
}
|
| 276 |
-
div[data-testid="stFileUploaderDropzoneInstructions"]{
|
| 277 |
-
display:none;
|
| 278 |
-
}
|
| 279 |
-
div[data-testid="stFileUploaderFile"]{
|
| 280 |
-
display:none;
|
| 281 |
-
}
|
| 282 |
-
div[data-testid="stFileUploaderFileName"]{
|
| 283 |
-
display:none;
|
| 284 |
-
}
|
| 285 |
-
</style>
|
| 286 |
-
''',
|
| 287 |
-
unsafe_allow_html=True
|
| 288 |
-
)
|
| 289 |
|
| 290 |
def set_up_chat_ui():
|
| 291 |
"""
|
| 292 |
Prepare the chat interface and related functionality.
|
| 293 |
"""
|
| 294 |
-
|
| 295 |
with st.expander('Usage Instructions'):
|
| 296 |
st.markdown(GlobalConfig.CHAT_USAGE_INSTRUCTIONS)
|
| 297 |
|
|
@@ -310,63 +278,28 @@ def set_up_chat_ui():
|
|
| 310 |
for msg in history.messages:
|
| 311 |
st.chat_message(msg.type).code(msg.content, language='json')
|
| 312 |
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
)
|
| 336 |
-
|
| 337 |
-
# PDF Processing and Slider Logic
|
| 338 |
-
if uploaded_pdf:
|
| 339 |
-
reader = PdfReader(uploaded_pdf)
|
| 340 |
-
total_pages = len(reader.pages)
|
| 341 |
-
st.session_state["pdf_page_count"] = total_pages
|
| 342 |
-
|
| 343 |
-
# Slider for page range
|
| 344 |
-
max_slider = min(50, total_pages) # enforce 50 page limit
|
| 345 |
-
|
| 346 |
-
with st.sidebar:
|
| 347 |
-
# display the pdf's name
|
| 348 |
-
st.text(f"PDF Uploaded: {uploaded_pdf.name}")
|
| 349 |
-
|
| 350 |
-
st.slider(
|
| 351 |
-
label="4: Specify a page range to examine:",
|
| 352 |
-
min_value=1,
|
| 353 |
-
max_value=max_slider,
|
| 354 |
-
value=(1, max_slider),
|
| 355 |
-
key="page_range"
|
| 356 |
-
)
|
| 357 |
-
|
| 358 |
-
# make container stay near bottom too, but surround the chat and have dotted border for the visual cue
|
| 359 |
-
upload_container.float("border-style:dashed solid;bottom:10px;width:150%;height:100px;font-size:10pt;left:0;")
|
| 360 |
-
|
| 361 |
-
if prompt:
|
| 362 |
-
prompt_text = prompt
|
| 363 |
-
|
| 364 |
-
# if the user uploaded a pdf and specified a range, get the contents
|
| 365 |
-
if uploaded_pdf and "page_range" in st.session_state:
|
| 366 |
-
st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(
|
| 367 |
-
uploaded_pdf,
|
| 368 |
-
st.session_state["page_range"]
|
| 369 |
-
)
|
| 370 |
|
| 371 |
provider, llm_name = llm_helper.get_provider_model(
|
| 372 |
llm_provider_to_use,
|
|
@@ -654,17 +587,14 @@ def _display_download_button(file_path: pathlib.Path):
|
|
| 654 |
|
| 655 |
:param file_path: The path of the .pptx file.
|
| 656 |
"""
|
|
|
|
| 657 |
with open(file_path, 'rb') as download_file:
|
| 658 |
-
print("entered")
|
| 659 |
-
print(f"filepath={file_path}")
|
| 660 |
st.download_button(
|
| 661 |
'Download PPTX file ⬇️',
|
| 662 |
data=download_file,
|
| 663 |
file_name='Presentation.pptx',
|
| 664 |
key=datetime.datetime.now()
|
| 665 |
)
|
| 666 |
-
|
| 667 |
-
print("download")
|
| 668 |
|
| 669 |
|
| 670 |
def main():
|
|
@@ -676,4 +606,4 @@ def main():
|
|
| 676 |
|
| 677 |
|
| 678 |
if __name__ == '__main__':
|
| 679 |
-
main()
|
|
|
|
| 13 |
import huggingface_hub
|
| 14 |
import json5
|
| 15 |
import ollama
|
|
|
|
| 16 |
import requests
|
| 17 |
import streamlit as st
|
|
|
|
| 18 |
from dotenv import load_dotenv
|
| 19 |
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
|
| 20 |
from langchain_core.messages import HumanMessage
|
|
|
|
| 29 |
|
| 30 |
RUN_IN_OFFLINE_MODE = os.getenv('RUN_IN_OFFLINE_MODE', 'False').lower() == 'true'
|
| 31 |
|
|
|
|
| 32 |
|
| 33 |
@st.cache_data
|
| 34 |
def _load_strings() -> dict:
|
|
|
|
| 142 |
IS_IT_REFINEMENT = 'is_it_refinement'
|
| 143 |
ADDITIONAL_INFO = 'additional_info'
|
| 144 |
|
| 145 |
+
|
| 146 |
logger = logging.getLogger(__name__)
|
| 147 |
|
| 148 |
texts = list(GlobalConfig.PPTX_TEMPLATE_FILES.keys())
|
|
|
|
| 222 |
value='2024-05-01-preview',
|
| 223 |
)
|
| 224 |
|
| 225 |
+
page_range_slider = st.slider("7: Specify a page range:",
|
| 226 |
+
1, 50, [1, 50])
|
| 227 |
+
st.session_state["page_range_slider"] = page_range_slider
|
| 228 |
+
|
| 229 |
+
|
| 230 |
def build_ui():
|
| 231 |
"""
|
| 232 |
Display the input elements for content generation.
|
|
|
|
| 254 |
|
| 255 |
set_up_chat_ui()
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
def set_up_chat_ui():
|
| 259 |
"""
|
| 260 |
Prepare the chat interface and related functionality.
|
| 261 |
"""
|
| 262 |
+
print(f"slider={st.session_state["page_range_slider"][0], st.session_state["page_range_slider"][1]}")
|
| 263 |
with st.expander('Usage Instructions'):
|
| 264 |
st.markdown(GlobalConfig.CHAT_USAGE_INSTRUCTIONS)
|
| 265 |
|
|
|
|
| 278 |
for msg in history.messages:
|
| 279 |
st.chat_message(msg.type).code(msg.content, language='json')
|
| 280 |
|
| 281 |
+
if prompt := st.chat_input(
|
| 282 |
+
placeholder=APP_TEXT['chat_placeholder'],
|
| 283 |
+
max_chars=GlobalConfig.LLM_MODEL_MAX_INPUT_LENGTH,
|
| 284 |
+
accept_file=True,
|
| 285 |
+
file_type=['pdf', ],
|
| 286 |
+
):
|
| 287 |
+
prompt_text = prompt.text or ''
|
| 288 |
+
if prompt['files']:
|
| 289 |
+
uploaded_pdf = prompt['files'][0]
|
| 290 |
+
# pdf_length = filem.get_pdf_length(uploaded_pdf)
|
| 291 |
+
# valid_pdf_length = min(50, pdf_length)
|
| 292 |
+
|
| 293 |
+
# st.session_state["page_range_slider"] = list(st.session_state["page_range_slider"])
|
| 294 |
+
# st.session_state["page_range_slider"][1] = valid_pdf_length
|
| 295 |
+
# print(f"length={pdf_length}, validated={valid_pdf_length}={st.session_state["page_range_slider"][-1]}")
|
| 296 |
+
|
| 297 |
+
# print(f"fname={uploaded_pdf.name}")
|
| 298 |
+
# Apparently, Streamlit stores uploaded files in memory and clears on browser close
|
| 299 |
+
# https://docs.streamlit.io/knowledge-base/using-streamlit/where-file-uploader-store-when-deleted
|
| 300 |
+
st.session_state[ADDITIONAL_INFO] = filem.get_pdf_contents(uploaded_pdf,
|
| 301 |
+
st.session_state["page_range_slider"])
|
| 302 |
+
print(f"extracting={st.session_state["page_range_slider"]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
provider, llm_name = llm_helper.get_provider_model(
|
| 305 |
llm_provider_to_use,
|
|
|
|
| 587 |
|
| 588 |
:param file_path: The path of the .pptx file.
|
| 589 |
"""
|
| 590 |
+
|
| 591 |
with open(file_path, 'rb') as download_file:
|
|
|
|
|
|
|
| 592 |
st.download_button(
|
| 593 |
'Download PPTX file ⬇️',
|
| 594 |
data=download_file,
|
| 595 |
file_name='Presentation.pptx',
|
| 596 |
key=datetime.datetime.now()
|
| 597 |
)
|
|
|
|
|
|
|
| 598 |
|
| 599 |
|
| 600 |
def main():
|
|
|
|
| 606 |
|
| 607 |
|
| 608 |
if __name__ == '__main__':
|
| 609 |
+
main()
|
helpers/file_manager.py
CHANGED
|
@@ -32,12 +32,18 @@ def get_pdf_contents(
|
|
| 32 |
"""
|
| 33 |
|
| 34 |
reader = PdfReader(pdf_file)
|
|
|
|
| 35 |
|
| 36 |
start, end = page_range # set start and end per the range (user-specified values)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
text = ''
|
| 39 |
for page_num in range(start - 1, end):
|
| 40 |
page = reader.pages[page_num]
|
| 41 |
text += page.extract_text()
|
| 42 |
|
| 43 |
-
return text
|
|
|
|
| 32 |
"""
|
| 33 |
|
| 34 |
reader = PdfReader(pdf_file)
|
| 35 |
+
n_pages = len(reader.pages)
|
| 36 |
|
| 37 |
start, end = page_range # set start and end per the range (user-specified values)
|
| 38 |
+
start = max(1, start)
|
| 39 |
+
end = min(n_pages, end)
|
| 40 |
+
if start >= end:
|
| 41 |
+
start = 1
|
| 42 |
+
print(f"starting at {start}, ending {end}")
|
| 43 |
|
| 44 |
text = ''
|
| 45 |
for page_num in range(start - 1, end):
|
| 46 |
page = reader.pages[page_num]
|
| 47 |
text += page.extract_text()
|
| 48 |
|
| 49 |
+
return text
|