vithacocf commited on
Commit
7472731
·
verified ·
1 Parent(s): e674cd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -106
app.py CHANGED
@@ -160,7 +160,7 @@ def make_cache_key(image: Image.Image, prompt: str) -> str:
160
 
161
  # --- 7. Inference with mixed precision & error handling ---
162
  def run_inference(image: Image.Image, prompt: str = "") -> str:
163
- prompt_text = prompt.strip() or "Read information from the document."
164
  img = normalize_image(image)
165
  key = make_cache_key(img, prompt_text)
166
 
@@ -186,7 +186,7 @@ def run_inference(image: Image.Image, prompt: str = "") -> str:
186
  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):
187
  gen = model.generate(
188
  **inputs,
189
- max_new_tokens=512,
190
  do_sample=False,
191
  eos_token_id=processor.tokenizer.eos_token_id
192
  )
@@ -280,17 +280,17 @@ STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
280
  2. DO NOT add, remove, rename, or reorder any XML tags.
281
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
282
  4. For every tag, fill in the exact value read from the image.
283
- • NEVER copy or repeat the label/placeholder text.
284
  • NEVER guess or invent values.
285
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
286
  6. DO NOT include Vietnamese text or translations inside tag values.
287
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
288
  8. Dates must be in YYYY-MM-DD format.
289
- 9. Boolean tags must be exactly true or false (lower-case, no quotes).
290
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
291
- 10. **Inside each value**
292
- • Replace every internal line-break with “, ” (comma + space).
293
- • Trim leading/trailing whitespace.
294
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
295
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
296
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
@@ -327,17 +327,17 @@ STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
327
  2. DO NOT add, remove, rename, or reorder any XML tags.
328
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
329
  4. For every tag, fill in the exact value read from the image.
330
- • NEVER copy or repeat the label/placeholder text.
331
  • NEVER guess or invent values.
332
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
333
  6. DO NOT include Vietnamese text or translations inside tag values.
334
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
335
  8. Dates must be in YYYY-MM-DD format.
336
- 9. Boolean tags must be exactly true or false (lower-case, no quotes).
337
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
338
- 10. **Inside each value**
339
- • Replace every internal line-break with “, ” (comma + space).
340
- • Trim leading/trailing whitespace.
341
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
342
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
343
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
@@ -366,17 +366,17 @@ STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
366
  2. DO NOT add, remove, rename, or reorder any XML tags.
367
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
368
  4. For every tag, fill in the exact value read from the image.
369
- • NEVER copy or repeat the label/placeholder text.
370
  • NEVER guess or invent values.
371
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
372
  6. DO NOT include Vietnamese text or translations inside tag values.
373
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
374
  8. Dates must be in YYYY-MM-DD format.
375
- 9. Boolean tags must be exactly true or false (lower-case, no quotes).
376
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
377
- 10. **Inside each value**
378
- • Replace every internal line-break with “, ” (comma + space).
379
- • Trim leading/trailing whitespace.
380
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
381
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
382
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
@@ -401,17 +401,17 @@ STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
401
  2. DO NOT add, remove, rename, or reorder any XML tags.
402
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
403
  4. For every tag, fill in the exact value read from the image.
404
- • NEVER copy or repeat the label/placeholder text.
405
  • NEVER guess or invent values.
406
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
407
  6. DO NOT include Vietnamese text or translations inside tag values.
408
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
409
  8. Dates must be in YYYY-MM-DD format.
410
- 9. Boolean tags must be exactly true or false (lower-case, no quotes).
411
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
412
- 10. **Inside each value**
413
- • Replace every internal line-break with “, ” (comma + space).
414
- • Trim leading/trailing whitespace.
415
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
416
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
417
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
@@ -437,17 +437,17 @@ STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
437
  2. DO NOT add, remove, rename, or reorder any XML tags.
438
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
439
  4. For every tag, fill in the exact value read from the image.
440
- • NEVER copy or repeat the label/placeholder text.
441
  • NEVER guess or invent values.
442
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
443
  6. DO NOT include Vietnamese text or translations inside tag values.
444
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
445
  8. Dates must be in YYYY-MM-DD format.
446
- 9. Boolean tags must be exactly true or false (lower-case, no quotes).
447
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
448
- 10. **Inside each value**
449
- • Replace every internal line-break with “, ” (comma + space).
450
- • Trim leading/trailing whitespace.
451
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
452
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
453
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
@@ -475,17 +475,17 @@ STRICT INSTRUCTIONS – read carefully and follow EXACTLY:
475
  2. DO NOT add, remove, rename, or reorder any XML tags.
476
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
477
  4. For every tag, fill in the exact value read from the image.
478
- • NEVER copy or repeat the label/placeholder text.
479
  • NEVER guess or invent values.
480
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
481
  6. DO NOT include Vietnamese text or translations inside tag values.
482
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
483
  8. Dates must be in YYYY-MM-DD format.
484
- 9. Boolean tags must be exactly true or false (lower-case, no quotes).
485
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
486
- 10. **Inside each value**
487
- • Replace every internal line-break with “, ” (comma + space).
488
- • Trim leading/trailing whitespace.
489
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
490
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
491
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
@@ -535,54 +535,6 @@ def export_json(image_name, result_text):
535
  except Exception as e:
536
  return "", f"[Export JSON Failed]: {e}"
537
 
538
- # --- 10. Gradio UI ---
539
- # with gr.Blocks(title="Camel-Doc-OCR") as demo:
540
- # gr.Markdown("Camel-Doc-OCR (Qwen2.5-VL, 4-bit)")
541
-
542
- # status_txt = gr.Textbox(label="Status & Memory", interactive=False)
543
- # cache_txt = gr.Textbox(label="Cache Stats", interactive=False)
544
- # clear_btn = gr.Button("Clear Cache")
545
- # clear_btn.click(fn=lambda: (cache_clear(), f"Cache: {len(_mru_cache)}/{CACHE_MAX_SIZE}"), outputs=[cache_txt])
546
-
547
- # file_input = gr.File(label="Tải ảnh hoặc PDF", file_types=[".jpg", ".jpeg", ".png", ".pdf"])
548
- # prompt_input = gr.Textbox(label="Prompt thuần", lines=2)
549
- # config_input = gr.Textbox(label="JSON Prompt", lines=12)
550
-
551
- # gr.Markdown("Chọn mẫu prompt:")
552
- # with gr.Row():
553
- # # for key in prompt_templates:
554
- # # btn = gr.Button(f"Mẫu {key}")
555
- # # btn.click(fn=insert_template, inputs=[gr.State(key)], outputs=config_input)
556
- # for key in prompt_templates:
557
- # gr.Button(f"Mẫu {key}").click(
558
- # fn=lambda k=key: insert_template(k),
559
- # outputs=config_input
560
- # )
561
-
562
-
563
- # run_btn = gr.Button("Chạy OCR")
564
- # export_btn = gr.Button("Xuất JSON", visible=False)
565
-
566
- # hidden_name = gr.Textbox(visible=False)
567
- # result_output = gr.Textbox(label="Kết quả trích xuất", lines=20)
568
- # json_file = gr.File(label="File JSON", visible=False, file_types=[".json"])
569
- # json_text = gr.Code(label="JSON Output", language="json", lines=20)
570
-
571
- # # Run inference
572
- # run_btn.click(
573
- # fn=handle_file,
574
- # inputs=[file_input, prompt_input, config_input],
575
- # outputs=[hidden_name, result_output]
576
- # )
577
-
578
- # # Update memory status
579
- # run_btn.click(fn=lambda: get_memory_info(), outputs=[status_txt])
580
- # run_btn.click(fn=lambda: f"Cache: {len(_mru_cache)}/{CACHE_MAX_SIZE}", outputs=[cache_txt])
581
- # run_btn.click(fn=lambda: gr.update(visible=True), outputs=[export_btn])
582
-
583
- # # Export
584
- # export_btn.click(fn=export_json, inputs=[hidden_name, result_output], outputs=[json_file, json_text])
585
- # export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_file])
586
  # --- 10. Gradio UI ---
587
  css = """
588
  .gradio-textbox textarea {
@@ -629,43 +581,43 @@ css = """
629
 
630
  with gr.Blocks(title="Camel-Doc-OCR", css=css) as demo:
631
  gr.Markdown("## 🧾 Camel-Doc-OCR (Qwen2.5-VL, 4-bit)")
632
-
633
  # --- Status Bar (Full width) ---
634
  with gr.Row():
635
  status_txt = gr.Textbox(label="Status & Memory", interactive=False, scale=2)
636
  cache_txt = gr.Textbox(label="Cache Stats", interactive=False, scale=1)
637
  clear_btn = gr.Button("Clear Cache", scale=1)
638
  clear_btn.click(fn=lambda: (cache_clear(), f"Cache: {len(_mru_cache)}/{CACHE_MAX_SIZE}"), outputs=[cache_txt])
639
-
640
  # --- Main Layout: 2 Columns ---
641
  with gr.Row():
642
  # === LEFT COLUMN: Input ===
643
  with gr.Column(scale=1):
644
  gr.Markdown("### 📥 INPUT")
645
-
646
  # File Input
647
  file_input = gr.File(
648
- label="📤 Tải ảnh hoặc PDF",
649
  file_types=[".jpg", ".jpeg", ".png", ".pdf"],
650
  height=100
651
  )
652
-
653
  # Prompt Input
654
  prompt_input = gr.Textbox(
655
- label="Prompt thuần",
656
  lines=2,
657
  placeholder="Nhập prompt tùy chỉnh...",
658
  max_lines=3
659
  )
660
-
661
  # JSON Config
662
  config_input = gr.Textbox(
663
- label="JSON Prompt",
664
  lines=6,
665
  placeholder="Cấu hình JSON sẽ xuất hiện ở đây...",
666
  max_lines=8
667
  )
668
-
669
  # Prompt Templates
670
  gr.Markdown("### 📑 Mẫu:")
671
  with gr.Row():
@@ -675,64 +627,64 @@ with gr.Blocks(title="Camel-Doc-OCR", css=css) as demo:
675
  inputs=[],
676
  outputs=config_input
677
  )
678
-
679
  # Run Button
680
  run_btn = gr.Button("🚀 Chạy OCR", variant="primary")
681
-
682
  # === RIGHT COLUMN: Output ===
683
  with gr.Column(scale=1):
684
  gr.Markdown("### 📤 OUTPUT")
685
-
686
  # Result Output
687
  result_output = gr.Textbox(
688
- label="Kết quả trích xuất",
689
  lines=10,
690
  placeholder="Kết quả sẽ hiển thị ở đây sau khi chạy OCR...",
691
  max_lines=12
692
  )
693
-
694
  # Export Section
695
  with gr.Row():
696
  export_btn = gr.Button("📦 Xuất JSON", visible=False, variant="secondary", size="sm")
697
-
698
  # JSON Output
699
  json_text = gr.Code(
700
- label="JSON Output",
701
- language="json",
702
  lines=6,
703
  visible=False
704
  )
705
-
706
  # Download File
707
  json_file = gr.File(
708
- label="File JSON để tải",
709
- visible=False,
710
  file_types=[".json"]
711
  )
712
-
713
  # --- Hidden Fields ---
714
  hidden_name = gr.Textbox(visible=False)
715
-
716
  # --- Event Handlers ---
717
-
718
  # Run Inference
719
  run_btn.click(
720
  fn=handle_file,
721
  inputs=[file_input, prompt_input, config_input],
722
  outputs=[hidden_name, result_output]
723
  )
724
-
725
  run_btn.click(fn=get_memory_info, outputs=[status_txt])
726
  run_btn.click(fn=lambda: f"Cache: {len(_mru_cache)}/{CACHE_MAX_SIZE}", outputs=[cache_txt])
727
  run_btn.click(fn=lambda: gr.update(visible=True), outputs=[export_btn])
728
-
729
  # Export JSON
730
  export_btn.click(
731
  fn=export_json,
732
  inputs=[hidden_name, result_output],
733
  outputs=[json_file, json_text]
734
  )
735
-
736
  export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_file])
737
  export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_text])
738
 
 
160
 
161
  # --- 7. Inference with mixed precision & error handling ---
162
  def run_inference(image: Image.Image, prompt: str = "") -> str:
163
+ prompt_text = prompt.strip() or ""
164
  img = normalize_image(image)
165
  key = make_cache_key(img, prompt_text)
166
 
 
186
  with torch.cuda.amp.autocast(enabled=(device.type=='cuda')):
187
  gen = model.generate(
188
  **inputs,
189
+ max_new_tokens=2048,
190
  do_sample=False,
191
  eos_token_id=processor.tokenizer.eos_token_id
192
  )
 
280
  2. DO NOT add, remove, rename, or reorder any XML tags.
281
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
282
  4. For every tag, fill in the exact value read from the image.
283
+ • NEVER copy or repeat the label/placeholder text.
284
  • NEVER guess or invent values.
285
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
286
  6. DO NOT include Vietnamese text or translations inside tag values.
287
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
288
  8. Dates must be in YYYY-MM-DD format.
289
+ 9. Boolean tags must be exactly true or false (lower-case, no quotes).
290
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
291
+ 10. **Inside each value**
292
+ • Replace every internal line-break with “, ” (comma + space).
293
+ • Trim leading/trailing whitespace.
294
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
295
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
296
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
 
327
  2. DO NOT add, remove, rename, or reorder any XML tags.
328
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
329
  4. For every tag, fill in the exact value read from the image.
330
+ • NEVER copy or repeat the label/placeholder text.
331
  • NEVER guess or invent values.
332
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
333
  6. DO NOT include Vietnamese text or translations inside tag values.
334
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
335
  8. Dates must be in YYYY-MM-DD format.
336
+ 9. Boolean tags must be exactly true or false (lower-case, no quotes).
337
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
338
+ 10. **Inside each value**
339
+ • Replace every internal line-break with “, ” (comma + space).
340
+ • Trim leading/trailing whitespace.
341
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
342
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
343
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
 
366
  2. DO NOT add, remove, rename, or reorder any XML tags.
367
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
368
  4. For every tag, fill in the exact value read from the image.
369
+ • NEVER copy or repeat the label/placeholder text.
370
  • NEVER guess or invent values.
371
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
372
  6. DO NOT include Vietnamese text or translations inside tag values.
373
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
374
  8. Dates must be in YYYY-MM-DD format.
375
+ 9. Boolean tags must be exactly true or false (lower-case, no quotes).
376
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
377
+ 10. **Inside each value**
378
+ • Replace every internal line-break with “, ” (comma + space).
379
+ • Trim leading/trailing whitespace.
380
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
381
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
382
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
 
401
  2. DO NOT add, remove, rename, or reorder any XML tags.
402
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
403
  4. For every tag, fill in the exact value read from the image.
404
+ • NEVER copy or repeat the label/placeholder text.
405
  • NEVER guess or invent values.
406
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
407
  6. DO NOT include Vietnamese text or translations inside tag values.
408
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
409
  8. Dates must be in YYYY-MM-DD format.
410
+ 9. Boolean tags must be exactly true or false (lower-case, no quotes).
411
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
412
+ 10. **Inside each value**
413
+ • Replace every internal line-break with “, ” (comma + space).
414
+ • Trim leading/trailing whitespace.
415
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
416
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
417
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
 
437
  2. DO NOT add, remove, rename, or reorder any XML tags.
438
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
439
  4. For every tag, fill in the exact value read from the image.
440
+ • NEVER copy or repeat the label/placeholder text.
441
  • NEVER guess or invent values.
442
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
443
  6. DO NOT include Vietnamese text or translations inside tag values.
444
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
445
  8. Dates must be in YYYY-MM-DD format.
446
+ 9. Boolean tags must be exactly true or false (lower-case, no quotes).
447
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
448
+ 10. **Inside each value**
449
+ • Replace every internal line-break with “, ” (comma + space).
450
+ • Trim leading/trailing whitespace.
451
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
452
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
453
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
 
475
  2. DO NOT add, remove, rename, or reorder any XML tags.
476
  3. DO NOT include explanations, markdown, notes, comments, or extra spacing outside the XML block.
477
  4. For every tag, fill in the exact value read from the image.
478
+ • NEVER copy or repeat the label/placeholder text.
479
  • NEVER guess or invent values.
480
  5. If a value is missing or unreadable, leave the tag EMPTY (e.g. <tag></tag>).
481
  6. DO NOT include Vietnamese text or translations inside tag values.
482
  7. The output MUST start with the root tag and end with its correct closing tag; all tags must be well-formed.
483
  8. Dates must be in YYYY-MM-DD format.
484
+ 9. Boolean tags must be exactly true or false (lower-case, no quotes).
485
  ✔ √ Yes Passed ⇒ true | ✘ X No Fail ⇒ false
486
+ 10. **Inside each value**
487
+ • Replace every internal line-break with “, ” (comma + space).
488
+ • Trim leading/trailing whitespace.
489
  • Escape XML special characters: & → &amp;, < → &lt;, > → &gt;.
490
  11. **Phone / contact fields** – digits, “+”, “–”, spaces only; if multiple numbers, separate with “, ”.
491
  12. **Signature fields** – fill ONLY if the signature appears as legible text; if it is handwritten, leave the tag empty.
 
535
  except Exception as e:
536
  return "", f"[Export JSON Failed]: {e}"
537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
538
  # --- 10. Gradio UI ---
539
  css = """
540
  .gradio-textbox textarea {
 
581
 
582
  with gr.Blocks(title="Camel-Doc-OCR", css=css) as demo:
583
  gr.Markdown("## 🧾 Camel-Doc-OCR (Qwen2.5-VL, 4-bit)")
584
+
585
  # --- Status Bar (Full width) ---
586
  with gr.Row():
587
  status_txt = gr.Textbox(label="Status & Memory", interactive=False, scale=2)
588
  cache_txt = gr.Textbox(label="Cache Stats", interactive=False, scale=1)
589
  clear_btn = gr.Button("Clear Cache", scale=1)
590
  clear_btn.click(fn=lambda: (cache_clear(), f"Cache: {len(_mru_cache)}/{CACHE_MAX_SIZE}"), outputs=[cache_txt])
591
+
592
  # --- Main Layout: 2 Columns ---
593
  with gr.Row():
594
  # === LEFT COLUMN: Input ===
595
  with gr.Column(scale=1):
596
  gr.Markdown("### 📥 INPUT")
597
+
598
  # File Input
599
  file_input = gr.File(
600
+ label="📤 Tải ảnh hoặc PDF",
601
  file_types=[".jpg", ".jpeg", ".png", ".pdf"],
602
  height=100
603
  )
604
+
605
  # Prompt Input
606
  prompt_input = gr.Textbox(
607
+ label="Prompt thuần",
608
  lines=2,
609
  placeholder="Nhập prompt tùy chỉnh...",
610
  max_lines=3
611
  )
612
+
613
  # JSON Config
614
  config_input = gr.Textbox(
615
+ label="JSON Prompt",
616
  lines=6,
617
  placeholder="Cấu hình JSON sẽ xuất hiện ở đây...",
618
  max_lines=8
619
  )
620
+
621
  # Prompt Templates
622
  gr.Markdown("### 📑 Mẫu:")
623
  with gr.Row():
 
627
  inputs=[],
628
  outputs=config_input
629
  )
630
+
631
  # Run Button
632
  run_btn = gr.Button("🚀 Chạy OCR", variant="primary")
633
+
634
  # === RIGHT COLUMN: Output ===
635
  with gr.Column(scale=1):
636
  gr.Markdown("### 📤 OUTPUT")
637
+
638
  # Result Output
639
  result_output = gr.Textbox(
640
+ label="Kết quả trích xuất",
641
  lines=10,
642
  placeholder="Kết quả sẽ hiển thị ở đây sau khi chạy OCR...",
643
  max_lines=12
644
  )
645
+
646
  # Export Section
647
  with gr.Row():
648
  export_btn = gr.Button("📦 Xuất JSON", visible=False, variant="secondary", size="sm")
649
+
650
  # JSON Output
651
  json_text = gr.Code(
652
+ label="JSON Output",
653
+ language="json",
654
  lines=6,
655
  visible=False
656
  )
657
+
658
  # Download File
659
  json_file = gr.File(
660
+ label="File JSON để tải",
661
+ visible=False,
662
  file_types=[".json"]
663
  )
664
+
665
  # --- Hidden Fields ---
666
  hidden_name = gr.Textbox(visible=False)
667
+
668
  # --- Event Handlers ---
669
+
670
  # Run Inference
671
  run_btn.click(
672
  fn=handle_file,
673
  inputs=[file_input, prompt_input, config_input],
674
  outputs=[hidden_name, result_output]
675
  )
676
+
677
  run_btn.click(fn=get_memory_info, outputs=[status_txt])
678
  run_btn.click(fn=lambda: f"Cache: {len(_mru_cache)}/{CACHE_MAX_SIZE}", outputs=[cache_txt])
679
  run_btn.click(fn=lambda: gr.update(visible=True), outputs=[export_btn])
680
+
681
  # Export JSON
682
  export_btn.click(
683
  fn=export_json,
684
  inputs=[hidden_name, result_output],
685
  outputs=[json_file, json_text]
686
  )
687
+
688
  export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_file])
689
  export_btn.click(fn=lambda: gr.update(visible=True), outputs=[json_text])
690