Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ import time
|
|
| 3 |
import cv2
|
| 4 |
import gradio as gr
|
| 5 |
from lineless_table_rec import LinelessTableRecognition
|
| 6 |
-
from paddleocr import PPStructure
|
| 7 |
from rapid_table import RapidTable
|
| 8 |
from rapidocr_onnxruntime import RapidOCR
|
| 9 |
from table_cls import TableCls
|
|
@@ -25,17 +24,16 @@ table_engine_list = [
|
|
| 25 |
"RapidTable(SLANet)",
|
| 26 |
"RapidTable(SLANet-plus)",
|
| 27 |
"wired_table_v2",
|
| 28 |
-
"pp_table",
|
| 29 |
"wired_table_v1",
|
| 30 |
"lineless_table"
|
| 31 |
]
|
| 32 |
|
| 33 |
# 示例图片路径
|
| 34 |
example_images = [
|
| 35 |
-
"images/wired1.
|
| 36 |
-
"images/wired2.
|
| 37 |
"images/wired3.png",
|
| 38 |
-
"images/lineless1.
|
| 39 |
"images/wired4.jpg",
|
| 40 |
"images/lineless2.png",
|
| 41 |
"images/wired5.jpg",
|
|
@@ -57,15 +55,6 @@ for det_model in det_model_dir.keys():
|
|
| 57 |
rec_model_path = rec_model_dir[rec_model]
|
| 58 |
key = f"{det_model}_{rec_model}"
|
| 59 |
ocr_engine_dict[key] = RapidOCR(det_model_path=det_model_path, rec_model_path=rec_model_path)
|
| 60 |
-
pp_engine_dict[key] = PPStructure(
|
| 61 |
-
layout=False,
|
| 62 |
-
show_log=False,
|
| 63 |
-
table=True,
|
| 64 |
-
use_onnx=True,
|
| 65 |
-
table_model_dir=table_rec_path,
|
| 66 |
-
det_model_dir=det_model_path,
|
| 67 |
-
rec_model_dir=rec_model_path
|
| 68 |
-
)
|
| 69 |
|
| 70 |
def trans_char_ocr_res(ocr_res):
|
| 71 |
word_result = []
|
|
@@ -95,8 +84,6 @@ def select_table_model(img, table_engine_type, det_model, rec_model):
|
|
| 95 |
return wired_table_engine_v2, table_engine_type
|
| 96 |
elif table_engine_type == "lineless_table":
|
| 97 |
return lineless_table_engine, table_engine_type
|
| 98 |
-
elif table_engine_type == "pp_table":
|
| 99 |
-
return pp_engine_dict[f"{det_model}_{rec_model}"], 0
|
| 100 |
elif table_engine_type == "auto":
|
| 101 |
cls, elasp = table_cls(img)
|
| 102 |
if cls == 'wired':
|
|
@@ -113,30 +100,22 @@ def process_image(img_input, small_box_cut_enhance, table_engine_type, char_ocr,
|
|
| 113 |
table_engine, talbe_type = select_table_model(img, table_engine_type, det_model, rec_model)
|
| 114 |
ocr_engine = select_ocr_model(det_model, rec_model)
|
| 115 |
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
| 120 |
polygons = [[polygon[0], polygon[1], polygon[4], polygon[5]] for polygon in polygons]
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
else:
|
| 124 |
-
ocr_res, ocr_infer_elapse = ocr_engine(img, return_word_box=char_ocr)
|
| 125 |
-
det_cost, cls_cost, rec_cost = ocr_infer_elapse
|
| 126 |
-
if char_ocr:
|
| 127 |
-
ocr_res = trans_char_ocr_res(ocr_res)
|
| 128 |
-
ocr_boxes = [box_4_2_poly_to_box_4_1(ori_ocr[0]) for ori_ocr in ocr_res]
|
| 129 |
-
if isinstance(table_engine, RapidTable):
|
| 130 |
-
html, polygons, table_rec_elapse = table_engine(img, ocr_result=ocr_res)
|
| 131 |
-
polygons = [[polygon[0], polygon[1], polygon[4], polygon[5]] for polygon in polygons]
|
| 132 |
-
elif isinstance(table_engine, (WiredTableRecognition, LinelessTableRecognition)):
|
| 133 |
-
html, table_rec_elapse, polygons, logic_points, ocr_res = table_engine(img, ocr_result=ocr_res,
|
| 134 |
enhance_box_line=small_box_cut_enhance,
|
| 135 |
rotated_fix=rotated_fix,
|
| 136 |
col_threshold=col_threshold,
|
| 137 |
row_threshold=row_threshold)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
|
| 141 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 142 |
table_boxes_img = plot_rec_box(img.copy(), polygons)
|
|
@@ -165,7 +144,7 @@ def main():
|
|
| 165 |
}
|
| 166 |
""") as demo:
|
| 167 |
gr.HTML(
|
| 168 |
-
"<h1 style='text-align: center;'><a href='https://github.com/RapidAI/TableStructureRec?tab=readme-ov-file'>TableStructureRec</a></h1>"
|
| 169 |
)
|
| 170 |
gr.HTML('''
|
| 171 |
<div class="header-links">
|
|
@@ -174,6 +153,7 @@ def main():
|
|
| 174 |
<a href="https://pypi.org/project/lineless-table-rec/"><img alt="PyPI" src="https://img.shields.io/pypi/v/lineless-table-rec"></a>
|
| 175 |
<a href="https://pepy.tech/project/lineless-table-rec"><img src="https://static.pepy.tech/personalized-badge/lineless-table-rec?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Lineless"></a>
|
| 176 |
<a href="https://pepy.tech/project/wired-table-rec"><img src="https://static.pepy.tech/personalized-badge/wired-table-rec?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Wired"></a>
|
|
|
|
| 177 |
<a href="https://semver.org/"><img alt="SemVer2.0" src="https://img.shields.io/badge/SemVer-2.0-brightgreen"></a>
|
| 178 |
<a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
|
| 179 |
<a href="https://github.com/RapidAI/TableStructureRec/blob/c41bbd23898cb27a957ed962b0ffee3c74dfeff1/LICENSE"><img alt="GitHub" src="https://img.shields.io/badge/license-Apache 2.0-blue"></a>
|
|
|
|
| 3 |
import cv2
|
| 4 |
import gradio as gr
|
| 5 |
from lineless_table_rec import LinelessTableRecognition
|
|
|
|
| 6 |
from rapid_table import RapidTable
|
| 7 |
from rapidocr_onnxruntime import RapidOCR
|
| 8 |
from table_cls import TableCls
|
|
|
|
| 24 |
"RapidTable(SLANet)",
|
| 25 |
"RapidTable(SLANet-plus)",
|
| 26 |
"wired_table_v2",
|
|
|
|
| 27 |
"wired_table_v1",
|
| 28 |
"lineless_table"
|
| 29 |
]
|
| 30 |
|
| 31 |
# 示例图片路径
|
| 32 |
example_images = [
|
| 33 |
+
"images/wired1.jpg",
|
| 34 |
+
"images/wired2.png",
|
| 35 |
"images/wired3.png",
|
| 36 |
+
"images/lineless1.jpg",
|
| 37 |
"images/wired4.jpg",
|
| 38 |
"images/lineless2.png",
|
| 39 |
"images/wired5.jpg",
|
|
|
|
| 55 |
rec_model_path = rec_model_dir[rec_model]
|
| 56 |
key = f"{det_model}_{rec_model}"
|
| 57 |
ocr_engine_dict[key] = RapidOCR(det_model_path=det_model_path, rec_model_path=rec_model_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
def trans_char_ocr_res(ocr_res):
|
| 60 |
word_result = []
|
|
|
|
| 84 |
return wired_table_engine_v2, table_engine_type
|
| 85 |
elif table_engine_type == "lineless_table":
|
| 86 |
return lineless_table_engine, table_engine_type
|
|
|
|
|
|
|
| 87 |
elif table_engine_type == "auto":
|
| 88 |
cls, elasp = table_cls(img)
|
| 89 |
if cls == 'wired':
|
|
|
|
| 100 |
table_engine, talbe_type = select_table_model(img, table_engine_type, det_model, rec_model)
|
| 101 |
ocr_engine = select_ocr_model(det_model, rec_model)
|
| 102 |
|
| 103 |
+
ocr_res, ocr_infer_elapse = ocr_engine(img, return_word_box=char_ocr)
|
| 104 |
+
det_cost, cls_cost, rec_cost = ocr_infer_elapse
|
| 105 |
+
if char_ocr:
|
| 106 |
+
ocr_res = trans_char_ocr_res(ocr_res)
|
| 107 |
+
ocr_boxes = [box_4_2_poly_to_box_4_1(ori_ocr[0]) for ori_ocr in ocr_res]
|
| 108 |
+
if isinstance(table_engine, RapidTable):
|
| 109 |
+
html, polygons, table_rec_elapse = table_engine(img, ocr_result=ocr_res)
|
| 110 |
polygons = [[polygon[0], polygon[1], polygon[4], polygon[5]] for polygon in polygons]
|
| 111 |
+
elif isinstance(table_engine, (WiredTableRecognition, LinelessTableRecognition)):
|
| 112 |
+
html, table_rec_elapse, polygons, logic_points, ocr_res = table_engine(img, ocr_result=ocr_res,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
enhance_box_line=small_box_cut_enhance,
|
| 114 |
rotated_fix=rotated_fix,
|
| 115 |
col_threshold=col_threshold,
|
| 116 |
row_threshold=row_threshold)
|
| 117 |
+
sum_elapse = time.time() - start
|
| 118 |
+
all_elapse = f"- table_type: {talbe_type}\n table all cost: {sum_elapse:.5f}\n - table rec cost: {table_rec_elapse:.5f}\n - ocr cost: {det_cost + cls_cost + rec_cost:.5f}"
|
| 119 |
|
| 120 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 121 |
table_boxes_img = plot_rec_box(img.copy(), polygons)
|
|
|
|
| 144 |
}
|
| 145 |
""") as demo:
|
| 146 |
gr.HTML(
|
| 147 |
+
"<h1 style='text-align: center;'><a href='https://github.com/RapidAI/TableStructureRec?tab=readme-ov-file'>TableStructureRec</a> & <a href='https://github.com/RapidAI/RapidTable'>RapidTable</a></h1>"
|
| 148 |
)
|
| 149 |
gr.HTML('''
|
| 150 |
<div class="header-links">
|
|
|
|
| 153 |
<a href="https://pypi.org/project/lineless-table-rec/"><img alt="PyPI" src="https://img.shields.io/pypi/v/lineless-table-rec"></a>
|
| 154 |
<a href="https://pepy.tech/project/lineless-table-rec"><img src="https://static.pepy.tech/personalized-badge/lineless-table-rec?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Lineless"></a>
|
| 155 |
<a href="https://pepy.tech/project/wired-table-rec"><img src="https://static.pepy.tech/personalized-badge/wired-table-rec?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Wired"></a>
|
| 156 |
+
<a href="https://pepy.tech/project/rapid-table"><img src="https://static.pepy.tech/personalized-badge/rapid-table?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20RapidTable"></a>
|
| 157 |
<a href="https://semver.org/"><img alt="SemVer2.0" src="https://img.shields.io/badge/SemVer-2.0-brightgreen"></a>
|
| 158 |
<a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
|
| 159 |
<a href="https://github.com/RapidAI/TableStructureRec/blob/c41bbd23898cb27a957ed962b0ffee3c74dfeff1/LICENSE"><img alt="GitHub" src="https://img.shields.io/badge/license-Apache 2.0-blue"></a>
|