Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -131,14 +131,13 @@ def convert_pdf_to_images_gradio(pdf_file):
|
|
| 131 |
raise Exception(f"Error converting PDF: {str(e)}")
|
| 132 |
|
| 133 |
|
| 134 |
-
def process_pdf_document(pdf_file, model,
|
| 135 |
"""Process uploaded PDF file page by page"""
|
| 136 |
if pdf_file is None:
|
| 137 |
return "No PDF file uploaded", ""
|
| 138 |
|
| 139 |
try:
|
| 140 |
-
|
| 141 |
-
progress_callback("π Converting PDF to images...")
|
| 142 |
images = convert_pdf_to_images_gradio(pdf_file)
|
| 143 |
|
| 144 |
if not images:
|
|
@@ -147,8 +146,8 @@ def process_pdf_document(pdf_file, model, progress_callback=None):
|
|
| 147 |
all_results = []
|
| 148 |
|
| 149 |
for page_idx, pil_image in enumerate(images):
|
| 150 |
-
|
| 151 |
-
|
| 152 |
|
| 153 |
layout_output = model.chat("Parse the reading order of this document.", pil_image)
|
| 154 |
|
|
@@ -173,8 +172,7 @@ def process_pdf_document(pdf_file, model, progress_callback=None):
|
|
| 173 |
}
|
| 174 |
all_results.append(page_result)
|
| 175 |
|
| 176 |
-
|
| 177 |
-
progress_callback("π Finalizing document...")
|
| 178 |
|
| 179 |
combined_markdown = "\n\n---\n\n".join([
|
| 180 |
f"# Page {result['page_number']}\n\n{result['markdown']}"
|
|
@@ -320,68 +318,31 @@ processed_markdown = ""
|
|
| 320 |
show_results_tab = False
|
| 321 |
|
| 322 |
|
| 323 |
-
def
|
| 324 |
-
"""
|
| 325 |
-
return "π Processing PDF... Please wait", gr.Tabs(visible=False)
|
| 326 |
-
|
| 327 |
-
def process_uploaded_pdf_with_progress(pdf_file):
|
| 328 |
-
"""Main processing function with custom progress updates"""
|
| 329 |
global processed_markdown, show_results_tab
|
| 330 |
|
| 331 |
if dolphin_model is None:
|
| 332 |
-
|
| 333 |
-
return
|
| 334 |
|
| 335 |
if pdf_file is None:
|
| 336 |
-
|
| 337 |
-
return
|
| 338 |
|
| 339 |
try:
|
| 340 |
-
|
| 341 |
-
return message
|
| 342 |
-
|
| 343 |
-
# Process with custom progress callback
|
| 344 |
-
for progress_msg in process_pdf_document_with_updates(pdf_file, dolphin_model):
|
| 345 |
-
yield progress_msg, gr.Tabs(visible=False)
|
| 346 |
-
|
| 347 |
-
# Final result
|
| 348 |
-
combined_markdown, status = process_pdf_document(pdf_file, dolphin_model)
|
| 349 |
|
| 350 |
if status == "processing_complete":
|
| 351 |
processed_markdown = combined_markdown
|
| 352 |
show_results_tab = True
|
| 353 |
-
|
| 354 |
else:
|
| 355 |
show_results_tab = False
|
| 356 |
-
|
| 357 |
|
| 358 |
except Exception as e:
|
| 359 |
show_results_tab = False
|
| 360 |
error_msg = f"β Error processing PDF: {str(e)}"
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
def process_pdf_document_with_updates(pdf_file, model):
|
| 364 |
-
"""Generator that yields progress updates"""
|
| 365 |
-
try:
|
| 366 |
-
yield "π Converting PDF to images..."
|
| 367 |
-
images = convert_pdf_to_images_gradio(pdf_file)
|
| 368 |
-
|
| 369 |
-
if not images:
|
| 370 |
-
yield "β Failed to convert PDF to images"
|
| 371 |
-
return
|
| 372 |
-
|
| 373 |
-
for page_idx, pil_image in enumerate(images):
|
| 374 |
-
yield f"π Processing page {page_idx + 1}/{len(images)}..."
|
| 375 |
-
|
| 376 |
-
# Small delay to show progress
|
| 377 |
-
import time
|
| 378 |
-
time.sleep(0.1)
|
| 379 |
-
|
| 380 |
-
yield "π Finalizing document..."
|
| 381 |
-
time.sleep(0.5)
|
| 382 |
-
|
| 383 |
-
except Exception as e:
|
| 384 |
-
yield f"β Error: {str(e)}"
|
| 385 |
|
| 386 |
|
| 387 |
def get_processed_markdown():
|
|
@@ -395,7 +356,7 @@ def clear_all():
|
|
| 395 |
global processed_markdown, show_results_tab
|
| 396 |
processed_markdown = ""
|
| 397 |
show_results_tab = False
|
| 398 |
-
return None, "
|
| 399 |
|
| 400 |
|
| 401 |
# Create Gradio interface
|
|
@@ -484,9 +445,9 @@ with gr.Blocks(
|
|
| 484 |
elem_id="progress-container"
|
| 485 |
)
|
| 486 |
|
| 487 |
-
# Status
|
| 488 |
status_output = gr.Markdown(
|
| 489 |
-
"
|
| 490 |
elem_classes="status-message"
|
| 491 |
)
|
| 492 |
|
|
@@ -529,10 +490,10 @@ with gr.Blocks(
|
|
| 529 |
|
| 530 |
# Event handlers
|
| 531 |
process_btn.click(
|
| 532 |
-
fn=
|
| 533 |
inputs=[pdf_input],
|
| 534 |
outputs=[status_output, results_tab],
|
| 535 |
-
show_progress=
|
| 536 |
).then(
|
| 537 |
fn=get_processed_markdown,
|
| 538 |
outputs=[markdown_display]
|
|
|
|
| 131 |
raise Exception(f"Error converting PDF: {str(e)}")
|
| 132 |
|
| 133 |
|
| 134 |
+
def process_pdf_document(pdf_file, model, progress=gr.Progress()):
|
| 135 |
"""Process uploaded PDF file page by page"""
|
| 136 |
if pdf_file is None:
|
| 137 |
return "No PDF file uploaded", ""
|
| 138 |
|
| 139 |
try:
|
| 140 |
+
progress(0.1, desc="Converting PDF to images...")
|
|
|
|
| 141 |
images = convert_pdf_to_images_gradio(pdf_file)
|
| 142 |
|
| 143 |
if not images:
|
|
|
|
| 146 |
all_results = []
|
| 147 |
|
| 148 |
for page_idx, pil_image in enumerate(images):
|
| 149 |
+
progress((page_idx + 1) / len(images) * 0.8 + 0.1,
|
| 150 |
+
desc=f"Processing page {page_idx + 1}/{len(images)}...")
|
| 151 |
|
| 152 |
layout_output = model.chat("Parse the reading order of this document.", pil_image)
|
| 153 |
|
|
|
|
| 172 |
}
|
| 173 |
all_results.append(page_result)
|
| 174 |
|
| 175 |
+
progress(1.0, desc="Processing complete!")
|
|
|
|
| 176 |
|
| 177 |
combined_markdown = "\n\n---\n\n".join([
|
| 178 |
f"# Page {result['page_number']}\n\n{result['markdown']}"
|
|
|
|
| 318 |
show_results_tab = False
|
| 319 |
|
| 320 |
|
| 321 |
+
def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
|
| 322 |
+
"""Main processing function for uploaded PDF"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
global processed_markdown, show_results_tab
|
| 324 |
|
| 325 |
if dolphin_model is None:
|
| 326 |
+
return "β Model not loaded", gr.Tabs(visible=False)
|
|
|
|
| 327 |
|
| 328 |
if pdf_file is None:
|
| 329 |
+
return "β No PDF uploaded", gr.Tabs(visible=False)
|
|
|
|
| 330 |
|
| 331 |
try:
|
| 332 |
+
combined_markdown, status = process_pdf_document(pdf_file, dolphin_model, progress)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
|
| 334 |
if status == "processing_complete":
|
| 335 |
processed_markdown = combined_markdown
|
| 336 |
show_results_tab = True
|
| 337 |
+
return "β
PDF processed successfully! Check the 'Document' tab above.", gr.Tabs(visible=True)
|
| 338 |
else:
|
| 339 |
show_results_tab = False
|
| 340 |
+
return combined_markdown, gr.Tabs(visible=False)
|
| 341 |
|
| 342 |
except Exception as e:
|
| 343 |
show_results_tab = False
|
| 344 |
error_msg = f"β Error processing PDF: {str(e)}"
|
| 345 |
+
return error_msg, gr.Tabs(visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
|
| 348 |
def get_processed_markdown():
|
|
|
|
| 356 |
global processed_markdown, show_results_tab
|
| 357 |
processed_markdown = ""
|
| 358 |
show_results_tab = False
|
| 359 |
+
return None, "β
Ready to process your PDF", gr.Tabs(visible=False)
|
| 360 |
|
| 361 |
|
| 362 |
# Create Gradio interface
|
|
|
|
| 445 |
elem_id="progress-container"
|
| 446 |
)
|
| 447 |
|
| 448 |
+
# Status output (hidden during processing)
|
| 449 |
status_output = gr.Markdown(
|
| 450 |
+
"β
Ready to process your PDF",
|
| 451 |
elem_classes="status-message"
|
| 452 |
)
|
| 453 |
|
|
|
|
| 490 |
|
| 491 |
# Event handlers
|
| 492 |
process_btn.click(
|
| 493 |
+
fn=process_uploaded_pdf,
|
| 494 |
inputs=[pdf_input],
|
| 495 |
outputs=[status_output, results_tab],
|
| 496 |
+
show_progress=True
|
| 497 |
).then(
|
| 498 |
fn=get_processed_markdown,
|
| 499 |
outputs=[markdown_display]
|