Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,7 +27,6 @@ import io
|
|
| 27 |
import requests
|
| 28 |
import numpy as np
|
| 29 |
from urllib.parse import quote
|
| 30 |
-
import PyPDF2
|
| 31 |
|
| 32 |
# =============================================================================
|
| 33 |
# βββββββββββββ EXTERNAL HELP LINKS βββββββββββββ
|
|
@@ -117,16 +116,8 @@ def preprocess_text(text):
|
|
| 117 |
return text.strip()
|
| 118 |
|
| 119 |
def sanitize_json_text(text):
|
| 120 |
-
text =
|
| 121 |
-
text
|
| 122 |
-
return text
|
| 123 |
-
|
| 124 |
-
def extract_pdf_text(pdf_file):
|
| 125 |
-
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
| 126 |
-
text = ""
|
| 127 |
-
for page in pdf_reader.pages:
|
| 128 |
-
text += page.extract_text() or ""
|
| 129 |
-
return text
|
| 130 |
|
| 131 |
# =============================================================================
|
| 132 |
# βββββββββββββ COSMOS DB FUNCTIONS βββββββββββββ
|
|
@@ -168,13 +159,10 @@ def delete_record(container, record):
|
|
| 168 |
container.delete_item(item=doc_id, partition_key=partition_key_value)
|
| 169 |
return True, f"Record {doc_id} deleted. ποΈ"
|
| 170 |
except exceptions.CosmosResourceNotFoundError:
|
| 171 |
-
st.write(f"Record {doc_id} not found in Cosmos DB - treating as success.")
|
| 172 |
return True, f"Record {doc_id} not found (already deleted). ποΈ"
|
| 173 |
except exceptions.CosmosHttpResponseError as e:
|
| 174 |
-
st.error(f"Cosmos HTTP error deleting {doc_id}: {str(e)}")
|
| 175 |
return False, f"HTTP error deleting {doc_id}: {str(e)} π¨"
|
| 176 |
except Exception as e:
|
| 177 |
-
st.error(f"Unexpected error deleting {doc_id}: {str(e)}")
|
| 178 |
return False, f"Unexpected error deleting {doc_id}: {str(e)} π±"
|
| 179 |
|
| 180 |
def save_to_cosmos_db(container, query, response1, response2):
|
|
@@ -376,38 +364,19 @@ def edit_all_documents(container, search_keyword=None):
|
|
| 376 |
formatted_ts = dt.strftime("%I:%M %p %m/%d/%Y")
|
| 377 |
header = f"{doc.get('name', 'Unnamed')} - {formatted_ts}"
|
| 378 |
with st.expander(header):
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
initial_value = st.session_state.saved_docs.get(doc['id'], json.dumps(doc, indent=2))
|
| 383 |
-
edited_content = st.text_area("Edit JSON", value=initial_value, height=300, key=doc_key)
|
| 384 |
-
with col_pdf:
|
| 385 |
-
if 'pdf_data' in doc:
|
| 386 |
-
st.markdown("### π PDF Preview")
|
| 387 |
-
pdf_bytes = base64.b64decode(doc['pdf_data'])
|
| 388 |
-
st.download_button(
|
| 389 |
-
label="β¬οΈ Download PDF",
|
| 390 |
-
data=pdf_bytes,
|
| 391 |
-
file_name=f"{doc.get('name', 'document')}.pdf",
|
| 392 |
-
mime="application/pdf"
|
| 393 |
-
)
|
| 394 |
-
|
| 395 |
col_save, col_delete = st.columns(2)
|
| 396 |
with col_save:
|
| 397 |
if st.button("πΎ Save", key=f"save_{doc['id']}"):
|
| 398 |
try:
|
| 399 |
-
cleaned_content = edited_content
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
updated_doc = json.loads(cleaned_content)
|
| 406 |
-
updated_doc['id'] = doc['id']
|
| 407 |
-
updated_doc['pk'] = doc.get('pk', doc['id'])
|
| 408 |
-
for field in ['_ts', '_rid', '_self', '_etag', '_attachments']:
|
| 409 |
-
updated_doc.pop(field, None)
|
| 410 |
-
|
| 411 |
success, message = update_record(container, updated_doc)
|
| 412 |
if success:
|
| 413 |
st.success(f"Saved {doc['id']}")
|
|
@@ -447,27 +416,6 @@ def new_item_default(container):
|
|
| 447 |
else:
|
| 448 |
st.error(f"Error creating new item: {message}")
|
| 449 |
|
| 450 |
-
def new_item_from_pdf(container, pdf_file):
|
| 451 |
-
new_id = generate_unique_id()
|
| 452 |
-
pdf_bytes = pdf_file.read()
|
| 453 |
-
pdf_text = extract_pdf_text(io.BytesIO(pdf_bytes))
|
| 454 |
-
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
| 455 |
-
default_doc = {
|
| 456 |
-
"id": new_id,
|
| 457 |
-
"pk": new_id,
|
| 458 |
-
"name": pdf_file.name,
|
| 459 |
-
"content": pdf_text[:1000],
|
| 460 |
-
"timestamp": datetime.now().isoformat(),
|
| 461 |
-
"type": "pdf_document",
|
| 462 |
-
"pdf_data": pdf_base64
|
| 463 |
-
}
|
| 464 |
-
success, message = insert_record(container, default_doc)
|
| 465 |
-
if success:
|
| 466 |
-
st.success(f"PDF document '{pdf_file.name}' created! β¨")
|
| 467 |
-
st.rerun()
|
| 468 |
-
else:
|
| 469 |
-
st.error(f"Error creating PDF item: {message}")
|
| 470 |
-
|
| 471 |
def add_field_to_doc():
|
| 472 |
key = st.session_state.new_field_key
|
| 473 |
value = st.session_state.new_field_value
|
|
@@ -531,25 +479,20 @@ def vector_keyword_search(keyword, doc):
|
|
| 531 |
return False
|
| 532 |
|
| 533 |
def search_documents_ui(container):
|
| 534 |
-
st.sidebar.subheader("π Vector Search")
|
| 535 |
with st.sidebar.form("search_form"):
|
| 536 |
-
keyword = st.text_input("Search
|
| 537 |
col1, col2 = st.columns(2)
|
| 538 |
with col1:
|
| 539 |
search_submitted = st.form_submit_button("π Search")
|
| 540 |
with col2:
|
| 541 |
clear_submitted = st.form_submit_button("ποΈ Clear")
|
| 542 |
if search_submitted and keyword:
|
| 543 |
-
st.session_state.active_search = keyword
|
| 544 |
st.rerun()
|
| 545 |
if clear_submitted:
|
| 546 |
if 'active_search' in st.session_state:
|
| 547 |
del st.session_state.active_search
|
| 548 |
st.rerun()
|
| 549 |
-
|
| 550 |
-
uploaded_file = st.sidebar.file_uploader("Upload Document (PDF)", type=["pdf"])
|
| 551 |
-
if uploaded_file and container:
|
| 552 |
-
new_item_from_pdf(container, uploaded_file)
|
| 553 |
|
| 554 |
def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
|
| 555 |
try:
|
|
@@ -668,10 +611,6 @@ def main():
|
|
| 668 |
# Sidebar: Hierarchical Navigation
|
| 669 |
st.sidebar.title("π Navigator")
|
| 670 |
|
| 671 |
-
# Vector Search Section (Moved to Top)
|
| 672 |
-
if st.session_state.current_container:
|
| 673 |
-
search_documents_ui(st.session_state.current_container)
|
| 674 |
-
|
| 675 |
# Databases Section
|
| 676 |
st.sidebar.subheader("ποΈ Databases")
|
| 677 |
if "client" not in st.session_state:
|
|
@@ -728,6 +667,7 @@ def main():
|
|
| 728 |
new_ai_record(st.session_state.current_container)
|
| 729 |
if st.sidebar.button("π New Links Record"):
|
| 730 |
new_links_record(st.session_state.current_container)
|
|
|
|
| 731 |
|
| 732 |
# Central Area: Editable Documents with Search Filter
|
| 733 |
if st.session_state.current_container:
|
|
|
|
| 27 |
import requests
|
| 28 |
import numpy as np
|
| 29 |
from urllib.parse import quote
|
|
|
|
| 30 |
|
| 31 |
# =============================================================================
|
| 32 |
# βββββββββββββ EXTERNAL HELP LINKS βββββββββββββ
|
|
|
|
| 116 |
return text.strip()
|
| 117 |
|
| 118 |
def sanitize_json_text(text):
|
| 119 |
+
text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', text)
|
| 120 |
+
return text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
# =============================================================================
|
| 123 |
# βββββββββββββ COSMOS DB FUNCTIONS βββββββββββββ
|
|
|
|
| 159 |
container.delete_item(item=doc_id, partition_key=partition_key_value)
|
| 160 |
return True, f"Record {doc_id} deleted. ποΈ"
|
| 161 |
except exceptions.CosmosResourceNotFoundError:
|
|
|
|
| 162 |
return True, f"Record {doc_id} not found (already deleted). ποΈ"
|
| 163 |
except exceptions.CosmosHttpResponseError as e:
|
|
|
|
| 164 |
return False, f"HTTP error deleting {doc_id}: {str(e)} π¨"
|
| 165 |
except Exception as e:
|
|
|
|
| 166 |
return False, f"Unexpected error deleting {doc_id}: {str(e)} π±"
|
| 167 |
|
| 168 |
def save_to_cosmos_db(container, query, response1, response2):
|
|
|
|
| 364 |
formatted_ts = dt.strftime("%I:%M %p %m/%d/%Y")
|
| 365 |
header = f"{doc.get('name', 'Unnamed')} - {formatted_ts}"
|
| 366 |
with st.expander(header):
|
| 367 |
+
doc_key = f"editor_{doc['id']}"
|
| 368 |
+
initial_value = st.session_state.saved_docs.get(doc['id'], json.dumps(doc, indent=2))
|
| 369 |
+
edited_content = st.text_area("Edit JSON", value=initial_value, height=300, key=doc_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
col_save, col_delete = st.columns(2)
|
| 371 |
with col_save:
|
| 372 |
if st.button("πΎ Save", key=f"save_{doc['id']}"):
|
| 373 |
try:
|
| 374 |
+
cleaned_content = sanitize_json_text(edited_content)
|
| 375 |
+
updated_doc = json.loads(cleaned_content)
|
| 376 |
+
updated_doc['id'] = doc['id']
|
| 377 |
+
updated_doc['pk'] = doc.get('pk', doc['id'])
|
| 378 |
+
for field in ['_ts', '_rid', '_self', '_etag', '_attachments']:
|
| 379 |
+
updated_doc.pop(field, None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
success, message = update_record(container, updated_doc)
|
| 381 |
if success:
|
| 382 |
st.success(f"Saved {doc['id']}")
|
|
|
|
| 416 |
else:
|
| 417 |
st.error(f"Error creating new item: {message}")
|
| 418 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
def add_field_to_doc():
|
| 420 |
key = st.session_state.new_field_key
|
| 421 |
value = st.session_state.new_field_value
|
|
|
|
| 479 |
return False
|
| 480 |
|
| 481 |
def search_documents_ui(container):
|
|
|
|
| 482 |
with st.sidebar.form("search_form"):
|
| 483 |
+
keyword = st.text_input("Search Keyword", key="search_keyword")
|
| 484 |
col1, col2 = st.columns(2)
|
| 485 |
with col1:
|
| 486 |
search_submitted = st.form_submit_button("π Search")
|
| 487 |
with col2:
|
| 488 |
clear_submitted = st.form_submit_button("ποΈ Clear")
|
| 489 |
if search_submitted and keyword:
|
| 490 |
+
st.session_state.active_search = keyword # Use a separate key
|
| 491 |
st.rerun()
|
| 492 |
if clear_submitted:
|
| 493 |
if 'active_search' in st.session_state:
|
| 494 |
del st.session_state.active_search
|
| 495 |
st.rerun()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
|
| 497 |
def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
|
| 498 |
try:
|
|
|
|
| 611 |
# Sidebar: Hierarchical Navigation
|
| 612 |
st.sidebar.title("π Navigator")
|
| 613 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
# Databases Section
|
| 615 |
st.sidebar.subheader("ποΈ Databases")
|
| 616 |
if "client" not in st.session_state:
|
|
|
|
| 667 |
new_ai_record(st.session_state.current_container)
|
| 668 |
if st.sidebar.button("π New Links Record"):
|
| 669 |
new_links_record(st.session_state.current_container)
|
| 670 |
+
search_documents_ui(st.session_state.current_container)
|
| 671 |
|
| 672 |
# Central Area: Editable Documents with Search Filter
|
| 673 |
if st.session_state.current_container:
|