Update app.py
Browse files
app.py
CHANGED
|
@@ -11,11 +11,9 @@ model_tag = "impira/layoutlm-document-qa"
|
|
| 11 |
MODEL = LayoutLMForQuestionAnswering.from_pretrained(model_tag).eval()
|
| 12 |
TOKENIZER = AutoTokenizer.from_pretrained(model_tag)
|
| 13 |
OCR = PaddleOCR(
|
| 14 |
-
use_angle_cls=True,
|
| 15 |
lang="en",
|
| 16 |
det_limit_side_len=10_000,
|
| 17 |
det_db_score_mode="slow",
|
| 18 |
-
enable_mlkdnn=True,
|
| 19 |
)
|
| 20 |
|
| 21 |
|
|
@@ -27,7 +25,7 @@ def predict(image: Image.Image, question: str, ocr_engine: str):
|
|
| 27 |
image_np = np.array(image)
|
| 28 |
|
| 29 |
if ocr_engine == PADDLE_OCR_LABEL:
|
| 30 |
-
ocr_result = OCR.ocr(image_np)[0]
|
| 31 |
words = [x[1][0] for x in ocr_result]
|
| 32 |
boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
|
| 33 |
|
|
|
|
| 11 |
MODEL = LayoutLMForQuestionAnswering.from_pretrained(model_tag).eval()
|
| 12 |
TOKENIZER = AutoTokenizer.from_pretrained(model_tag)
|
| 13 |
OCR = PaddleOCR(
|
|
|
|
| 14 |
lang="en",
|
| 15 |
det_limit_side_len=10_000,
|
| 16 |
det_db_score_mode="slow",
|
|
|
|
| 17 |
)
|
| 18 |
|
| 19 |
|
|
|
|
| 25 |
image_np = np.array(image)
|
| 26 |
|
| 27 |
if ocr_engine == PADDLE_OCR_LABEL:
|
| 28 |
+
ocr_result = OCR.ocr(image_np, cls=False)[0]
|
| 29 |
words = [x[1][0] for x in ocr_result]
|
| 30 |
boxes = np.asarray([x[0] for x in ocr_result]) # (n_boxes, 4, 2)
|
| 31 |
|