raksama19 commited on
Commit
f235195
Β·
verified Β·
1 Parent(s): 9e24adc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -172
app.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- PDF Document Processing Gradio App for HuggingFace Spaces
3
- Built on DOLPHIN model for document parsing and analysis
4
  """
5
 
6
  import gradio as gr
@@ -25,28 +25,22 @@ try:
25
  from mdx_math import MathExtension
26
  MATH_EXTENSION_AVAILABLE = True
27
  except ImportError:
28
- # mdx_math is not available in standard PyPI, gracefully continue without it
29
  pass
30
 
31
 
32
  class DOLPHIN:
33
  def __init__(self, model_id_or_path):
34
- """Initialize the Hugging Face model optimized for HF Spaces
35
-
36
- Args:
37
- model_id_or_path: Path to local model or Hugging Face model ID
38
- """
39
  self.processor = AutoProcessor.from_pretrained(model_id_or_path)
40
  self.model = VisionEncoderDecoderModel.from_pretrained(
41
  model_id_or_path,
42
- torch_dtype=torch.float16, # Use half precision for memory efficiency
43
  device_map="auto" if torch.cuda.is_available() else None
44
  )
45
  self.model.eval()
46
 
47
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
48
  if not torch.cuda.is_available():
49
- # Keep full precision on CPU
50
  self.model = self.model.float()
51
 
52
  self.tokenizer = self.processor.tokenizer
@@ -62,7 +56,6 @@ class DOLPHIN:
62
  images = image
63
  prompts = prompt if isinstance(prompt, list) else [prompt] * len(images)
64
 
65
- # Prepare image
66
  batch_inputs = self.processor(images, return_tensors="pt", padding=True)
67
  batch_pixel_values = batch_inputs.pixel_values
68
 
@@ -71,7 +64,6 @@ class DOLPHIN:
71
  else:
72
  batch_pixel_values = batch_pixel_values.to(self.device)
73
 
74
- # Prepare prompt
75
  prompts = [f"<s>{p} <Answer/>" for p in prompts]
76
  batch_prompt_inputs = self.tokenizer(
77
  prompts,
@@ -82,14 +74,13 @@ class DOLPHIN:
82
  batch_prompt_ids = batch_prompt_inputs.input_ids.to(self.device)
83
  batch_attention_mask = batch_prompt_inputs.attention_mask.to(self.device)
84
 
85
- # Generate text with memory-efficient settings
86
  with torch.no_grad():
87
  outputs = self.model.generate(
88
  pixel_values=batch_pixel_values,
89
  decoder_input_ids=batch_prompt_ids,
90
  decoder_attention_mask=batch_attention_mask,
91
  min_length=1,
92
- max_length=2048, # Reduced for memory efficiency
93
  pad_token_id=self.tokenizer.pad_token_id,
94
  eos_token_id=self.tokenizer.eos_token_id,
95
  use_cache=True,
@@ -101,10 +92,8 @@ class DOLPHIN:
101
  temperature=1.0
102
  )
103
 
104
- # Process output
105
  sequences = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=False)
106
 
107
- # Clean prompt text from output
108
  results = []
109
  for i, sequence in enumerate(sequences):
110
  cleaned = sequence.replace(prompts[i], "").replace("<pad>", "").replace("</s>", "").strip()
@@ -120,24 +109,17 @@ def convert_pdf_to_images_gradio(pdf_file):
120
  try:
121
  import pymupdf
122
 
123
- # Handle different file input types
124
  if isinstance(pdf_file, str):
125
- # If it's a file path (Gradio 5.x behavior)
126
  pdf_document = pymupdf.open(pdf_file)
127
  else:
128
- # If it's a file object with .read() method
129
  pdf_bytes = pdf_file.read()
130
  pdf_document = pymupdf.open(stream=pdf_bytes, filetype="pdf")
131
 
132
  images = []
133
  for page_num in range(len(pdf_document)):
134
  page = pdf_document[page_num]
135
-
136
- # Render page to image with high DPI for better quality
137
- mat = pymupdf.Matrix(2.0, 2.0) # 2x zoom for better quality
138
  pix = page.get_pixmap(matrix=mat)
139
-
140
- # Convert to PIL Image
141
  img_data = pix.tobytes("png")
142
  pil_image = Image.open(io.BytesIO(img_data)).convert("RGB")
143
  images.append(pil_image)
@@ -152,91 +134,60 @@ def convert_pdf_to_images_gradio(pdf_file):
152
  def process_pdf_document(pdf_file, model, progress=gr.Progress()):
153
  """Process uploaded PDF file page by page"""
154
  if pdf_file is None:
155
- return "No PDF file uploaded", [], {}
156
 
157
  try:
158
- # Convert PDF to images
159
  progress(0.1, desc="Converting PDF to images...")
160
  images = convert_pdf_to_images_gradio(pdf_file)
161
 
162
  if not images:
163
- return "Failed to convert PDF to images", [], {}
164
 
165
- # Process each page
166
  all_results = []
167
- page_previews = []
168
 
169
  for page_idx, pil_image in enumerate(images):
170
  progress((page_idx + 1) / len(images) * 0.8 + 0.1,
171
  desc=f"Processing page {page_idx + 1}/{len(images)}...")
172
 
173
- # Stage 1: Layout parsing
174
  layout_output = model.chat("Parse the reading order of this document.", pil_image)
175
 
176
- # Stage 2: Element processing with memory optimization
177
  padded_image, dims = prepare_image(pil_image)
178
  recognition_results = process_elements_optimized(
179
  layout_output,
180
  padded_image,
181
  dims,
182
  model,
183
- max_batch_size=4 # Smaller batch size for memory efficiency
184
  )
185
 
186
- # Convert to markdown
187
  try:
188
  markdown_converter = MarkdownConverter()
189
  markdown_content = markdown_converter.convert(recognition_results)
190
  except:
191
- # Fallback markdown generation
192
  markdown_content = generate_fallback_markdown(recognition_results)
193
 
194
- # Store page results
195
  page_result = {
196
  "page_number": page_idx + 1,
197
- "layout_output": layout_output,
198
- "elements": recognition_results,
199
  "markdown": markdown_content
200
  }
201
  all_results.append(page_result)
202
-
203
- # Create page preview with results
204
- page_preview = {
205
- "image": pil_image,
206
- "page_num": page_idx + 1,
207
- "element_count": len(recognition_results),
208
- "markdown_preview": markdown_content[:500] + "..." if len(markdown_content) > 500 else markdown_content
209
- }
210
- page_previews.append(page_preview)
211
 
212
  progress(1.0, desc="Processing complete!")
213
 
214
- # Combine all markdown
215
  combined_markdown = "\n\n---\n\n".join([
216
  f"# Page {result['page_number']}\n\n{result['markdown']}"
217
  for result in all_results
218
  ])
219
 
220
- # Create summary JSON
221
- summary_json = {
222
- "total_pages": len(images),
223
- "processing_status": "completed",
224
- "pages": all_results,
225
- "model_info": {
226
- "device": model.device,
227
- "total_elements": sum(len(page["elements"]) for page in all_results)
228
- }
229
- }
230
-
231
- return combined_markdown, page_previews, summary_json
232
 
233
  except Exception as e:
234
  error_msg = f"Error processing PDF: {str(e)}"
235
- return error_msg, [], {"error": error_msg}
236
 
237
 
238
- def process_elements_optimized(layout_results, padded_image, dims, model, max_batch_size=4):
239
- """Optimized element processing for memory efficiency"""
240
  layout_results = parse_layout_string(layout_results)
241
 
242
  text_elements = []
@@ -245,7 +196,6 @@ def process_elements_optimized(layout_results, padded_image, dims, model, max_ba
245
  previous_box = None
246
  reading_order = 0
247
 
248
- # Collect elements to process
249
  for bbox, label in layout_results:
250
  try:
251
  x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, previous_box = process_coordinates(
@@ -255,7 +205,6 @@ def process_elements_optimized(layout_results, padded_image, dims, model, max_ba
255
  cropped = padded_image[y1:y2, x1:x2]
256
  if cropped.size > 0 and cropped.shape[0] > 3 and cropped.shape[1] > 3:
257
  if label == "fig":
258
- # Convert to base64 for figure display
259
  pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
260
  pil_crop = crop_margin(pil_crop)
261
 
@@ -290,7 +239,6 @@ def process_elements_optimized(layout_results, padded_image, dims, model, max_ba
290
  print(f"Error processing element {label}: {str(e)}")
291
  continue
292
 
293
- # Process elements in small batches
294
  recognition_results = figure_results.copy()
295
 
296
  if text_elements:
@@ -309,8 +257,8 @@ def process_elements_optimized(layout_results, padded_image, dims, model, max_ba
309
  return recognition_results
310
 
311
 
312
- def process_element_batch_optimized(elements, model, prompt, max_batch_size=4):
313
- """Process elements in small batches for memory efficiency"""
314
  results = []
315
  batch_size = min(len(elements), max_batch_size)
316
 
@@ -319,7 +267,6 @@ def process_element_batch_optimized(elements, model, prompt, max_batch_size=4):
319
  crops_list = [elem["crop"] for elem in batch_elements]
320
  prompts_list = [prompt] * len(crops_list)
321
 
322
- # Process batch
323
  batch_results = model.chat(prompts_list, crops_list)
324
 
325
  for j, result in enumerate(batch_results):
@@ -331,7 +278,6 @@ def process_element_batch_optimized(elements, model, prompt, max_batch_size=4):
331
  "reading_order": elem["reading_order"],
332
  })
333
 
334
- # Clear memory
335
  del crops_list, batch_elements
336
  if torch.cuda.is_available():
337
  torch.cuda.empty_cache()
@@ -352,28 +298,6 @@ def generate_fallback_markdown(recognition_results):
352
  return markdown_content
353
 
354
 
355
- def create_page_gallery(page_previews):
356
- """Create a gallery view of processed pages"""
357
- if not page_previews:
358
- return "No pages processed yet."
359
-
360
- gallery_html = "<div style='display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 20px;'>"
361
-
362
- for preview in page_previews:
363
- gallery_html += f"""
364
- <div style='border: 1px solid #ddd; padding: 15px; border-radius: 8px;'>
365
- <h3>Page {preview['page_num']}</h3>
366
- <p><strong>Elements found:</strong> {preview['element_count']}</p>
367
- <div style='max-height: 200px; overflow-y: auto; background: #f5f5f5; padding: 10px; border-radius: 4px; font-size: 12px;'>
368
- {preview['markdown_preview']}
369
- </div>
370
- </div>
371
- """
372
-
373
- gallery_html += "</div>"
374
- return gallery_html
375
-
376
-
377
  # Initialize model
378
  model_path = "./hf_model"
379
  if not os.path.exists(model_path):
@@ -382,130 +306,210 @@ if not os.path.exists(model_path):
382
  try:
383
  dolphin_model = DOLPHIN(model_path)
384
  print(f"Model loaded successfully from {model_path}")
385
- model_status = f"βœ… Model loaded: {model_path} (Device: {dolphin_model.device})"
386
  except Exception as e:
387
  print(f"Error loading model: {e}")
388
  dolphin_model = None
389
  model_status = f"❌ Model failed to load: {str(e)}"
390
 
391
 
 
 
 
 
 
392
  def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
393
  """Main processing function for uploaded PDF"""
 
 
394
  if dolphin_model is None:
395
- return "Model not loaded", "Model not loaded", {}, "Model not loaded"
396
 
397
  if pdf_file is None:
398
- return "No PDF uploaded", "No PDF uploaded", {}, "No PDF uploaded"
399
 
400
  try:
401
- # Process the PDF
402
- combined_markdown, page_previews, summary_json = process_pdf_document(
403
- pdf_file, dolphin_model, progress
404
- )
405
-
406
- # Create page gallery
407
- gallery_html = create_page_gallery(page_previews)
408
-
409
- return combined_markdown, combined_markdown, summary_json, gallery_html
410
 
 
 
 
 
 
 
 
 
411
  except Exception as e:
 
412
  error_msg = f"Error processing PDF: {str(e)}"
413
- return error_msg, error_msg, {"error": error_msg}, error_msg
 
 
 
 
 
 
414
 
415
 
416
  def clear_all():
417
- """Clear all inputs and outputs"""
418
- return None, "", "", {}, ""
 
 
 
419
 
420
 
421
- # Create Gradio interface optimized for HuggingFace Spaces
422
  with gr.Blocks(
423
- title="DOLPHIN PDF Document AI",
424
  theme=gr.themes.Soft(),
425
  css="""
426
- .main-container { max-width: 1200px; margin: 0 auto; }
427
- .status-box { padding: 10px; border-radius: 5px; margin: 10px 0; }
428
- .success { background-color: #d4edda; border: 1px solid #c3e6cb; }
429
- .error { background-color: #f8d7da; border: 1px solid #f5c6cb; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  """
431
  ) as demo:
432
- gr.Markdown("# 🐬 DOLPHIN PDF Document AI")
433
- gr.Markdown(
434
- "Upload a PDF document and process it page by page with the DOLPHIN model. "
435
- "Optimized for HuggingFace Spaces deployment."
436
- )
437
 
438
- # Model status
439
- gr.Markdown(f"**Model Status:** {model_status}")
440
-
441
- with gr.Row():
442
- # Left column: Upload and controls
443
- with gr.Column(scale=1):
444
- gr.Markdown("### πŸ“„ Upload PDF Document")
445
- pdf_input = gr.File(
446
- file_types=[".pdf"],
447
- label="Select PDF File",
448
- height=200
449
  )
450
 
451
- with gr.Row():
452
- process_btn = gr.Button("πŸš€ Process PDF", variant="primary", size="lg")
453
- clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary")
454
-
455
- # Right column: Results tabs
456
- with gr.Column(scale=2):
457
- gr.Markdown("### πŸ“Š Processing Results")
458
-
459
- with gr.Tabs():
460
- with gr.TabItem("πŸ“– Markdown Output"):
461
- markdown_output = gr.Markdown(
462
- label="Processed Document",
463
- latex_delimiters=[
464
- {"left": "$$", "right": "$$", "display": True},
465
- {"left": "$", "right": "$", "display": False}
466
- ],
467
- height=600
468
- )
469
 
470
- with gr.TabItem("πŸ“ Raw Markdown"):
471
- raw_markdown = gr.Code(
472
- label="Raw Markdown Text",
473
- language="markdown",
474
- lines=25
475
- )
476
 
477
- with gr.TabItem("πŸ” Page Gallery"):
478
- page_gallery = gr.HTML(
479
- label="Page Overview"
480
- )
 
 
481
 
482
- with gr.TabItem("πŸ”§ JSON Details"):
483
- json_output = gr.JSON(
484
- label="Processing Details",
485
- height=600
486
- )
487
-
488
- # Progress bar
489
- progress_bar = gr.HTML(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
  # Event handlers
492
  process_btn.click(
493
  fn=process_uploaded_pdf,
494
  inputs=[pdf_input],
495
- outputs=[markdown_output, raw_markdown, json_output, page_gallery],
496
  show_progress=True
 
 
 
 
 
 
497
  )
498
 
499
  clear_btn.click(
500
  fn=clear_all,
501
- outputs=[pdf_input, markdown_output, raw_markdown, json_output, page_gallery]
 
 
 
502
  )
503
 
504
- # Footer
505
- gr.Markdown(
506
- "---\n"
507
- "**Note:** This app is optimized for NVIDIA T4 deployment on HuggingFace Spaces. "
508
- "Processing time depends on document complexity and page count."
 
 
 
 
 
 
509
  )
510
 
511
 
@@ -515,7 +519,7 @@ if __name__ == "__main__":
515
  server_port=7860,
516
  share=False,
517
  show_error=True,
518
- max_threads=2, # Limit threads for memory efficiency
519
  inbrowser=False,
520
  quiet=True
521
  )
 
1
  """
2
+ DOLPHIN PDF Document AI - Final Version
3
+ Optimized for HuggingFace Spaces NVIDIA T4 Small deployment
4
  """
5
 
6
  import gradio as gr
 
25
  from mdx_math import MathExtension
26
  MATH_EXTENSION_AVAILABLE = True
27
  except ImportError:
 
28
  pass
29
 
30
 
31
  class DOLPHIN:
32
  def __init__(self, model_id_or_path):
33
+ """Initialize the Hugging Face model optimized for T4 Small"""
 
 
 
 
34
  self.processor = AutoProcessor.from_pretrained(model_id_or_path)
35
  self.model = VisionEncoderDecoderModel.from_pretrained(
36
  model_id_or_path,
37
+ torch_dtype=torch.float16,
38
  device_map="auto" if torch.cuda.is_available() else None
39
  )
40
  self.model.eval()
41
 
42
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
43
  if not torch.cuda.is_available():
 
44
  self.model = self.model.float()
45
 
46
  self.tokenizer = self.processor.tokenizer
 
56
  images = image
57
  prompts = prompt if isinstance(prompt, list) else [prompt] * len(images)
58
 
 
59
  batch_inputs = self.processor(images, return_tensors="pt", padding=True)
60
  batch_pixel_values = batch_inputs.pixel_values
61
 
 
64
  else:
65
  batch_pixel_values = batch_pixel_values.to(self.device)
66
 
 
67
  prompts = [f"<s>{p} <Answer/>" for p in prompts]
68
  batch_prompt_inputs = self.tokenizer(
69
  prompts,
 
74
  batch_prompt_ids = batch_prompt_inputs.input_ids.to(self.device)
75
  batch_attention_mask = batch_prompt_inputs.attention_mask.to(self.device)
76
 
 
77
  with torch.no_grad():
78
  outputs = self.model.generate(
79
  pixel_values=batch_pixel_values,
80
  decoder_input_ids=batch_prompt_ids,
81
  decoder_attention_mask=batch_attention_mask,
82
  min_length=1,
83
+ max_length=1024, # Reduced for T4 Small
84
  pad_token_id=self.tokenizer.pad_token_id,
85
  eos_token_id=self.tokenizer.eos_token_id,
86
  use_cache=True,
 
92
  temperature=1.0
93
  )
94
 
 
95
  sequences = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=False)
96
 
 
97
  results = []
98
  for i, sequence in enumerate(sequences):
99
  cleaned = sequence.replace(prompts[i], "").replace("<pad>", "").replace("</s>", "").strip()
 
109
  try:
110
  import pymupdf
111
 
 
112
  if isinstance(pdf_file, str):
 
113
  pdf_document = pymupdf.open(pdf_file)
114
  else:
 
115
  pdf_bytes = pdf_file.read()
116
  pdf_document = pymupdf.open(stream=pdf_bytes, filetype="pdf")
117
 
118
  images = []
119
  for page_num in range(len(pdf_document)):
120
  page = pdf_document[page_num]
121
+ mat = pymupdf.Matrix(2.0, 2.0)
 
 
122
  pix = page.get_pixmap(matrix=mat)
 
 
123
  img_data = pix.tobytes("png")
124
  pil_image = Image.open(io.BytesIO(img_data)).convert("RGB")
125
  images.append(pil_image)
 
134
  def process_pdf_document(pdf_file, model, progress=gr.Progress()):
135
  """Process uploaded PDF file page by page"""
136
  if pdf_file is None:
137
+ return "No PDF file uploaded", ""
138
 
139
  try:
 
140
  progress(0.1, desc="Converting PDF to images...")
141
  images = convert_pdf_to_images_gradio(pdf_file)
142
 
143
  if not images:
144
+ return "Failed to convert PDF to images", ""
145
 
 
146
  all_results = []
 
147
 
148
  for page_idx, pil_image in enumerate(images):
149
  progress((page_idx + 1) / len(images) * 0.8 + 0.1,
150
  desc=f"Processing page {page_idx + 1}/{len(images)}...")
151
 
 
152
  layout_output = model.chat("Parse the reading order of this document.", pil_image)
153
 
 
154
  padded_image, dims = prepare_image(pil_image)
155
  recognition_results = process_elements_optimized(
156
  layout_output,
157
  padded_image,
158
  dims,
159
  model,
160
+ max_batch_size=2 # Smaller batch for T4 Small
161
  )
162
 
 
163
  try:
164
  markdown_converter = MarkdownConverter()
165
  markdown_content = markdown_converter.convert(recognition_results)
166
  except:
 
167
  markdown_content = generate_fallback_markdown(recognition_results)
168
 
 
169
  page_result = {
170
  "page_number": page_idx + 1,
 
 
171
  "markdown": markdown_content
172
  }
173
  all_results.append(page_result)
 
 
 
 
 
 
 
 
 
174
 
175
  progress(1.0, desc="Processing complete!")
176
 
 
177
  combined_markdown = "\n\n---\n\n".join([
178
  f"# Page {result['page_number']}\n\n{result['markdown']}"
179
  for result in all_results
180
  ])
181
 
182
+ return combined_markdown, "processing_complete"
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  except Exception as e:
185
  error_msg = f"Error processing PDF: {str(e)}"
186
+ return error_msg, "error"
187
 
188
 
189
+ def process_elements_optimized(layout_results, padded_image, dims, model, max_batch_size=2):
190
+ """Optimized element processing for T4 Small"""
191
  layout_results = parse_layout_string(layout_results)
192
 
193
  text_elements = []
 
196
  previous_box = None
197
  reading_order = 0
198
 
 
199
  for bbox, label in layout_results:
200
  try:
201
  x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, previous_box = process_coordinates(
 
205
  cropped = padded_image[y1:y2, x1:x2]
206
  if cropped.size > 0 and cropped.shape[0] > 3 and cropped.shape[1] > 3:
207
  if label == "fig":
 
208
  pil_crop = Image.fromarray(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB))
209
  pil_crop = crop_margin(pil_crop)
210
 
 
239
  print(f"Error processing element {label}: {str(e)}")
240
  continue
241
 
 
242
  recognition_results = figure_results.copy()
243
 
244
  if text_elements:
 
257
  return recognition_results
258
 
259
 
260
+ def process_element_batch_optimized(elements, model, prompt, max_batch_size=2):
261
+ """Process elements in small batches for T4 Small"""
262
  results = []
263
  batch_size = min(len(elements), max_batch_size)
264
 
 
267
  crops_list = [elem["crop"] for elem in batch_elements]
268
  prompts_list = [prompt] * len(crops_list)
269
 
 
270
  batch_results = model.chat(prompts_list, crops_list)
271
 
272
  for j, result in enumerate(batch_results):
 
278
  "reading_order": elem["reading_order"],
279
  })
280
 
 
281
  del crops_list, batch_elements
282
  if torch.cuda.is_available():
283
  torch.cuda.empty_cache()
 
298
  return markdown_content
299
 
300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  # Initialize model
302
  model_path = "./hf_model"
303
  if not os.path.exists(model_path):
 
306
  try:
307
  dolphin_model = DOLPHIN(model_path)
308
  print(f"Model loaded successfully from {model_path}")
309
+ model_status = f"βœ… Model ready (Device: {dolphin_model.device})"
310
  except Exception as e:
311
  print(f"Error loading model: {e}")
312
  dolphin_model = None
313
  model_status = f"❌ Model failed to load: {str(e)}"
314
 
315
 
316
+ # Global state for managing tabs
317
+ processed_markdown = ""
318
+ show_results_tab = False
319
+
320
+
321
  def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
322
  """Main processing function for uploaded PDF"""
323
+ global processed_markdown, show_results_tab
324
+
325
  if dolphin_model is None:
326
+ return "Model not loaded", gr.Tabs(visible=False)
327
 
328
  if pdf_file is None:
329
+ return "No PDF uploaded", gr.Tabs(visible=False)
330
 
331
  try:
332
+ combined_markdown, status = process_pdf_document(pdf_file, dolphin_model, progress)
 
 
 
 
 
 
 
 
333
 
334
+ if status == "processing_complete":
335
+ processed_markdown = combined_markdown
336
+ show_results_tab = True
337
+ return "PDF processed successfully! Check the 'Document' tab above.", gr.Tabs(visible=True)
338
+ else:
339
+ show_results_tab = False
340
+ return combined_markdown, gr.Tabs(visible=False)
341
+
342
  except Exception as e:
343
+ show_results_tab = False
344
  error_msg = f"Error processing PDF: {str(e)}"
345
+ return error_msg, gr.Tabs(visible=False)
346
+
347
+
348
+ def get_processed_markdown():
349
+ """Return the processed markdown content"""
350
+ global processed_markdown
351
+ return processed_markdown if processed_markdown else "No document processed yet."
352
 
353
 
354
  def clear_all():
355
+ """Clear all data and hide results tab"""
356
+ global processed_markdown, show_results_tab
357
+ processed_markdown = ""
358
+ show_results_tab = False
359
+ return None, "Upload a PDF to get started", gr.Tabs(visible=False)
360
 
361
 
362
+ # Create Gradio interface
363
  with gr.Blocks(
364
+ title="DOLPHIN PDF AI",
365
  theme=gr.themes.Soft(),
366
  css="""
367
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
368
+
369
+ * {
370
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important;
371
+ }
372
+
373
+ .main-container {
374
+ max-width: 1000px;
375
+ margin: 0 auto;
376
+ }
377
+ .upload-container {
378
+ text-align: center;
379
+ padding: 40px 20px;
380
+ border: 2px dashed #e0e0e0;
381
+ border-radius: 15px;
382
+ margin: 20px 0;
383
+ }
384
+ .upload-button {
385
+ font-size: 18px !important;
386
+ padding: 15px 30px !important;
387
+ margin: 20px 0 !important;
388
+ font-weight: 600 !important;
389
+ }
390
+ .status-message {
391
+ text-align: center;
392
+ padding: 15px;
393
+ margin: 10px 0;
394
+ border-radius: 8px;
395
+ font-weight: 500;
396
+ }
397
+ .chatbot-container {
398
+ max-height: 600px;
399
+ }
400
+ h1, h2, h3 {
401
+ font-weight: 700 !important;
402
+ }
403
  """
404
  ) as demo:
 
 
 
 
 
405
 
406
+ with gr.Tabs() as main_tabs:
407
+ # Home Tab
408
+ with gr.TabItem("🏠 Home", id="home"):
409
+ gr.Markdown(
410
+ "# Scholar Express\n"
411
+ "### Upload a research paper to get a web-friendly version, an AI chatbot, and a podcast summary. Because of our reliance on Generative AI, some errors are inevitable.\n"
412
+ f"**Status:** {model_status}"
 
 
 
 
413
  )
414
 
415
+ with gr.Column(elem_classes="upload-container"):
416
+ gr.Markdown("## πŸ“„ Upload Your PDF Document")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
 
418
+ pdf_input = gr.File(
419
+ file_types=[".pdf"],
420
+ label="",
421
+ height=150,
422
+ elem_id="pdf_upload"
423
+ )
424
 
425
+ process_btn = gr.Button(
426
+ "πŸš€ Process PDF",
427
+ variant="primary",
428
+ size="lg",
429
+ elem_classes="upload-button"
430
+ )
431
 
432
+ clear_btn = gr.Button(
433
+ "πŸ—‘οΈ Clear",
434
+ variant="secondary"
435
+ )
436
+
437
+ # Status and progress
438
+ status_output = gr.Markdown(
439
+ "Upload a PDF to get started",
440
+ elem_classes="status-message"
441
+ )
442
+
443
+ # Results Tab (initially hidden)
444
+ with gr.TabItem("πŸ“– Document", id="results", visible=False) as results_tab:
445
+ gr.Markdown("## Processed Document")
446
+
447
+ markdown_display = gr.Markdown(
448
+ value="",
449
+ latex_delimiters=[
450
+ {"left": "$$", "right": "$$", "display": True},
451
+ {"left": "$", "right": "$", "display": False}
452
+ ],
453
+ height=700
454
+ )
455
+
456
+ # Chatbot Tab (initially hidden)
457
+ with gr.TabItem("πŸ’¬ Chat", id="chat", visible=False) as chat_tab:
458
+ gr.Markdown("## Ask Questions About Your Document")
459
+
460
+ chatbot = gr.Chatbot(
461
+ value=[],
462
+ height=500,
463
+ elem_classes="chatbot-container",
464
+ placeholder="Your conversation will appear here once you process a document..."
465
+ )
466
+
467
+ with gr.Row():
468
+ msg_input = gr.Textbox(
469
+ placeholder="Ask a question about the processed document...",
470
+ scale=4,
471
+ container=False
472
+ )
473
+ send_btn = gr.Button("Send", variant="primary", scale=1)
474
+
475
+ gr.Markdown(
476
+ "*Chat functionality will be implemented in the next version*",
477
+ elem_id="chat-notice"
478
+ )
479
 
480
  # Event handlers
481
  process_btn.click(
482
  fn=process_uploaded_pdf,
483
  inputs=[pdf_input],
484
+ outputs=[status_output, results_tab],
485
  show_progress=True
486
+ ).then(
487
+ fn=get_processed_markdown,
488
+ outputs=[markdown_display]
489
+ ).then(
490
+ fn=lambda: gr.TabItem(visible=True),
491
+ outputs=[chat_tab]
492
  )
493
 
494
  clear_btn.click(
495
  fn=clear_all,
496
+ outputs=[pdf_input, status_output, results_tab]
497
+ ).then(
498
+ fn=lambda: gr.TabItem(visible=False),
499
+ outputs=[chat_tab]
500
  )
501
 
502
+ # Placeholder chat functionality
503
+ def placeholder_chat(message, history):
504
+ return history + [["Coming soon: AI-powered document Q&A", "This feature will allow you to ask questions about your processed PDF document."]]
505
+
506
+ send_btn.click(
507
+ fn=placeholder_chat,
508
+ inputs=[msg_input, chatbot],
509
+ outputs=[chatbot]
510
+ ).then(
511
+ lambda: "",
512
+ outputs=[msg_input]
513
  )
514
 
515
 
 
519
  server_port=7860,
520
  share=False,
521
  show_error=True,
522
+ max_threads=1, # Single thread for T4 Small
523
  inbrowser=False,
524
  quiet=True
525
  )