Spaces:
Sleeping
Sleeping
vk
commited on
Commit
·
6e95c9d
1
Parent(s):
9a0940b
docstrings added
Browse files- app.py +14 -1
- ocr_inference.py +31 -6
- text_detection.py +22 -2
- utils.py +45 -4
app.py
CHANGED
|
@@ -6,6 +6,19 @@ import gradio as gr
|
|
| 6 |
|
| 7 |
def get_response(input_img):
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
if hasattr(input_img,'shape'):
|
| 10 |
src_img=input_img.copy()
|
| 11 |
outputs=text_detector.predict([input_img])
|
|
@@ -30,7 +43,7 @@ def get_response(input_img):
|
|
| 30 |
cropped = result[y:y + h, x:x + w, :]
|
| 31 |
# cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
|
| 32 |
# cv2.imwrite(f"cropped/output_{i}_{j}.jpg",cropped)
|
| 33 |
-
texts.append(ocr.predict([cropped]))
|
| 34 |
j += 1
|
| 35 |
|
| 36 |
return "\n".join(texts)
|
|
|
|
| 6 |
|
| 7 |
def get_response(input_img):
|
| 8 |
|
| 9 |
+
'''
|
| 10 |
+
|
| 11 |
+
detects all possible texts in the image and recognizes it
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
input_img (numpy array): one image of type numpy array
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
return a string of OCR text
|
| 19 |
+
|
| 20 |
+
'''
|
| 21 |
+
|
| 22 |
if hasattr(input_img,'shape'):
|
| 23 |
src_img=input_img.copy()
|
| 24 |
outputs=text_detector.predict([input_img])
|
|
|
|
| 43 |
cropped = result[y:y + h, x:x + w, :]
|
| 44 |
# cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
|
| 45 |
# cv2.imwrite(f"cropped/output_{i}_{j}.jpg",cropped)
|
| 46 |
+
texts.append(ocr.predict([cropped])[0])
|
| 47 |
j += 1
|
| 48 |
|
| 49 |
return "\n".join(texts)
|
ocr_inference.py
CHANGED
|
@@ -28,6 +28,11 @@ class OCR():
|
|
| 28 |
|
| 29 |
def __init__(self,model_path):
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
ie = Core()
|
| 32 |
|
| 33 |
print('\n',model_path)
|
|
@@ -45,9 +50,18 @@ class OCR():
|
|
| 45 |
|
| 46 |
|
| 47 |
|
| 48 |
-
def resize_norm_img(self,img
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
self.image_shape=[3,48,int(img.shape[1]*2)]
|
|
@@ -55,8 +69,7 @@ class OCR():
|
|
| 55 |
|
| 56 |
imgC,imgH,imgW=self.image_shape
|
| 57 |
|
| 58 |
-
|
| 59 |
-
# todo: change to 0 and modified image shape
|
| 60 |
max_wh_ratio = imgW * 1.0 / imgH
|
| 61 |
h, w = img.shape[0], img.shape[1]
|
| 62 |
ratio = w * 1.0 / h
|
|
@@ -94,6 +107,18 @@ class OCR():
|
|
| 94 |
|
| 95 |
def predict(self,src):
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
imgs=[]
|
| 98 |
show_frames=[]
|
| 99 |
|
|
@@ -133,7 +158,7 @@ class OCR():
|
|
| 133 |
|
| 134 |
|
| 135 |
|
| 136 |
-
return texts
|
| 137 |
|
| 138 |
|
| 139 |
|
|
|
|
| 28 |
|
| 29 |
def __init__(self,model_path):
|
| 30 |
|
| 31 |
+
'''
|
| 32 |
+
Args:
|
| 33 |
+
mode_path(string): path of openvino xml of model
|
| 34 |
+
'''
|
| 35 |
+
|
| 36 |
ie = Core()
|
| 37 |
|
| 38 |
print('\n',model_path)
|
|
|
|
| 50 |
|
| 51 |
|
| 52 |
|
| 53 |
+
def resize_norm_img(self,img):
|
| 54 |
+
|
| 55 |
+
'''
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
|
| 59 |
+
img : numpy array
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
Returns:
|
| 63 |
+
returns preprocessed & normalized numpy array of image
|
| 64 |
+
'''
|
| 65 |
|
| 66 |
|
| 67 |
self.image_shape=[3,48,int(img.shape[1]*2)]
|
|
|
|
| 69 |
|
| 70 |
imgC,imgH,imgW=self.image_shape
|
| 71 |
|
| 72 |
+
|
|
|
|
| 73 |
max_wh_ratio = imgW * 1.0 / imgH
|
| 74 |
h, w = img.shape[0], img.shape[1]
|
| 75 |
ratio = w * 1.0 / h
|
|
|
|
| 107 |
|
| 108 |
def predict(self,src):
|
| 109 |
|
| 110 |
+
'''
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
src : either list of images numpy array or list of image filepath string
|
| 114 |
+
|
| 115 |
+
Returns:
|
| 116 |
+
|
| 117 |
+
list of texts
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
'''
|
| 121 |
+
|
| 122 |
imgs=[]
|
| 123 |
show_frames=[]
|
| 124 |
|
|
|
|
| 158 |
|
| 159 |
|
| 160 |
|
| 161 |
+
return texts
|
| 162 |
|
| 163 |
|
| 164 |
|
text_detection.py
CHANGED
|
@@ -21,8 +21,15 @@ import cv2
|
|
| 21 |
|
| 22 |
class Text_Detection():
|
| 23 |
|
|
|
|
|
|
|
| 24 |
def __init__(self, model_path):
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
ie = Core()
|
| 27 |
|
| 28 |
print('\n', model_path)
|
|
@@ -46,11 +53,12 @@ class Text_Detection():
|
|
| 46 |
|
| 47 |
"""
|
| 48 |
resize image to a size multiple of 32 which is required by the network
|
| 49 |
-
|
|
|
|
| 50 |
img(array): array with shape [h, w, c]
|
| 51 |
return(tuple):
|
| 52 |
img, (ratio_h, ratio_w)
|
| 53 |
-
|
| 54 |
data = {}
|
| 55 |
limit_side_len = self.limit_side_len
|
| 56 |
h, w, c = img.shape
|
|
@@ -102,6 +110,17 @@ class Text_Detection():
|
|
| 102 |
|
| 103 |
def predict(self, src):
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
imgs = []
|
| 106 |
src_imgs=[]
|
| 107 |
shape_list=[]
|
|
@@ -133,6 +152,7 @@ class Text_Detection():
|
|
| 133 |
blob = np.concatenate(imgs, axis=0).astype(np.float32)
|
| 134 |
|
| 135 |
outputs = self.compiled_model([blob])[self.output_layer]
|
|
|
|
| 136 |
outputs=self.postprocess_detection(outputs,shape_list)
|
| 137 |
return outputs
|
| 138 |
|
|
|
|
| 21 |
|
| 22 |
class Text_Detection():
|
| 23 |
|
| 24 |
+
|
| 25 |
+
|
| 26 |
def __init__(self, model_path):
|
| 27 |
|
| 28 |
+
'''
|
| 29 |
+
Args:
|
| 30 |
+
mode_path(string): path of openvino xml of model
|
| 31 |
+
'''
|
| 32 |
+
|
| 33 |
ie = Core()
|
| 34 |
|
| 35 |
print('\n', model_path)
|
|
|
|
| 53 |
|
| 54 |
"""
|
| 55 |
resize image to a size multiple of 32 which is required by the network
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
img(array): array with shape [h, w, c]
|
| 59 |
return(tuple):
|
| 60 |
img, (ratio_h, ratio_w)
|
| 61 |
+
"""
|
| 62 |
data = {}
|
| 63 |
limit_side_len = self.limit_side_len
|
| 64 |
h, w, c = img.shape
|
|
|
|
| 110 |
|
| 111 |
def predict(self, src):
|
| 112 |
|
| 113 |
+
'''
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
src : either list of images numpy array or list of image filepath string
|
| 117 |
+
|
| 118 |
+
Returns(list):
|
| 119 |
+
list of bounding boxes co-ordinates of detected texts
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
'''
|
| 123 |
+
|
| 124 |
imgs = []
|
| 125 |
src_imgs=[]
|
| 126 |
shape_list=[]
|
|
|
|
| 152 |
blob = np.concatenate(imgs, axis=0).astype(np.float32)
|
| 153 |
|
| 154 |
outputs = self.compiled_model([blob])[self.output_layer]
|
| 155 |
+
print('text detection model output shape:',outputs.shape)
|
| 156 |
outputs=self.postprocess_detection(outputs,shape_list)
|
| 157 |
return outputs
|
| 158 |
|
utils.py
CHANGED
|
@@ -20,6 +20,20 @@ import pyclipper
|
|
| 20 |
|
| 21 |
|
| 22 |
def img_decode(img):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
img = np.frombuffer(img, dtype='uint8')
|
| 24 |
img = cv2.imdecode(img, 1)
|
| 25 |
# print(img.shape)
|
|
@@ -57,8 +71,15 @@ class DBPostProcess(object):
|
|
| 57 |
|
| 58 |
def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
| 59 |
'''
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
'''
|
| 63 |
|
| 64 |
bitmap = _bitmap
|
|
@@ -104,8 +125,15 @@ class DBPostProcess(object):
|
|
| 104 |
|
| 105 |
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
| 106 |
'''
|
| 107 |
-
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
'''
|
| 110 |
|
| 111 |
bitmap = _bitmap
|
|
@@ -150,6 +178,8 @@ class DBPostProcess(object):
|
|
| 150 |
return np.array(boxes, dtype=np.int16), scores
|
| 151 |
|
| 152 |
def unclip(self, box, unclip_ratio):
|
|
|
|
|
|
|
| 153 |
poly = Polygon(box)
|
| 154 |
distance = poly.area * unclip_ratio / poly.length
|
| 155 |
offset = pyclipper.PyclipperOffset()
|
|
@@ -221,6 +251,17 @@ class DBPostProcess(object):
|
|
| 221 |
|
| 222 |
def sort_boxes(self,boxes):
|
| 223 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
ymin_list=[]
|
| 225 |
for box in boxes:
|
| 226 |
xmin,ymin=box[0]
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def img_decode(img):
|
| 23 |
+
|
| 24 |
+
'''
|
| 25 |
+
|
| 26 |
+
Converts byte array to numpy array
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
img(byte array)
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
img (numpy array)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
'''
|
| 37 |
img = np.frombuffer(img, dtype='uint8')
|
| 38 |
img = cv2.imdecode(img, 1)
|
| 39 |
# print(img.shape)
|
|
|
|
| 71 |
|
| 72 |
def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
| 73 |
'''
|
| 74 |
+
Calculates polygon coords & scores from segmentation bitmap
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
pred: model predictions of shape (N,1,H,W)
|
| 78 |
+
_bitmap: single map with shape (1, H, W),
|
| 79 |
+
whose values are binarized as {0, 1}
|
| 80 |
+
dest_width: input image width
|
| 81 |
+
dest_height: input image height
|
| 82 |
+
|
| 83 |
'''
|
| 84 |
|
| 85 |
bitmap = _bitmap
|
|
|
|
| 125 |
|
| 126 |
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
|
| 127 |
'''
|
| 128 |
+
Calculates boundingbox coords & scores from segmentation bitmap
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
pred: model predictions of shape (N,1,H,W)
|
| 132 |
+
_bitmap: single map with shape (1, H, W),
|
| 133 |
+
whose values are binarized as {0, 1}
|
| 134 |
+
dest_width: input image width
|
| 135 |
+
dest_height: input image height
|
| 136 |
+
|
| 137 |
'''
|
| 138 |
|
| 139 |
bitmap = _bitmap
|
|
|
|
| 178 |
return np.array(boxes, dtype=np.int16), scores
|
| 179 |
|
| 180 |
def unclip(self, box, unclip_ratio):
|
| 181 |
+
|
| 182 |
+
|
| 183 |
poly = Polygon(box)
|
| 184 |
distance = poly.area * unclip_ratio / poly.length
|
| 185 |
offset = pyclipper.PyclipperOffset()
|
|
|
|
| 251 |
|
| 252 |
def sort_boxes(self,boxes):
|
| 253 |
|
| 254 |
+
|
| 255 |
+
'''Sort boxes along height
|
| 256 |
+
|
| 257 |
+
Args:
|
| 258 |
+
boxes(numpy): numpy array of boxes
|
| 259 |
+
|
| 260 |
+
Returns:
|
| 261 |
+
boxes (numpy): sorted numpy array of boxes'''
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
|
| 265 |
ymin_list=[]
|
| 266 |
for box in boxes:
|
| 267 |
xmin,ymin=box[0]
|