Spaces:

vk888
/

SVTR-OCR-App

Sleeping

App Files Files Community

vk commited on Feb 23

Commit

6e95c9d

1 Parent(s): 9a0940b

docstrings added

Browse files

Files changed (4) hide show

app.py +14 -1
ocr_inference.py +31 -6
text_detection.py +22 -2
utils.py +45 -4

app.py CHANGED Viewed

@@ -6,6 +6,19 @@ import gradio as gr
 def get_response(input_img):
     if hasattr(input_img,'shape'):
         src_img=input_img.copy()
     outputs=text_detector.predict([input_img])
@@ -30,7 +43,7 @@ def get_response(input_img):
             cropped = result[y:y + h, x:x + w, :]
             # cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
             # cv2.imwrite(f"cropped/output_{i}_{j}.jpg",cropped)
-            texts.append(ocr.predict([cropped]))
             j += 1
     return "\n".join(texts)

 def get_response(input_img):
+    '''
+    detects all possible texts in the image and recognizes it
+        Args:
+            input_img (numpy array): one image of type numpy array
+        Returns:
+            return a string of OCR text
+    '''
     if hasattr(input_img,'shape'):
         src_img=input_img.copy()
     outputs=text_detector.predict([input_img])
             cropped = result[y:y + h, x:x + w, :]
             # cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
             # cv2.imwrite(f"cropped/output_{i}_{j}.jpg",cropped)
+            texts.append(ocr.predict([cropped])[0])
             j += 1
     return "\n".join(texts)

ocr_inference.py CHANGED Viewed

@@ -28,6 +28,11 @@ class OCR():
     def __init__(self,model_path):
         ie = Core()
         print('\n',model_path)
@@ -45,9 +50,18 @@ class OCR():
-    def resize_norm_img(self,img,
-                    padding=True,
-                    interpolation=cv2.INTER_LINEAR):
         self.image_shape=[3,48,int(img.shape[1]*2)]
@@ -55,8 +69,7 @@ class OCR():
         imgC,imgH,imgW=self.image_shape
-        # todo: change to 0 and modified image shape
         max_wh_ratio = imgW * 1.0 / imgH
         h, w = img.shape[0], img.shape[1]
         ratio = w * 1.0 / h
@@ -94,6 +107,18 @@ class OCR():
     def predict(self,src):
         imgs=[]
         show_frames=[]
@@ -133,7 +158,7 @@ class OCR():
-        return texts[0]

     def __init__(self,model_path):
+        '''
+            Args:
+                mode_path(string): path of openvino xml of model
+        '''
         ie = Core()
         print('\n',model_path)
+    def resize_norm_img(self,img):
+        '''
+            Args:
+                img : numpy array
+            Returns:
+                returns preprocessed & normalized numpy array of image
+        '''
         self.image_shape=[3,48,int(img.shape[1]*2)]
         imgC,imgH,imgW=self.image_shape
         max_wh_ratio = imgW * 1.0 / imgH
         h, w = img.shape[0], img.shape[1]
         ratio = w * 1.0 / h
     def predict(self,src):
+        '''
+            Args:
+                src : either list of images numpy array or list of image filepath string
+            Returns:
+                list of texts
+        '''
         imgs=[]
         show_frames=[]
+        return texts

text_detection.py CHANGED Viewed

@@ -21,8 +21,15 @@ import cv2
 class Text_Detection():
     def __init__(self, model_path):
         ie = Core()
         print('\n', model_path)
@@ -46,11 +53,12 @@ class Text_Detection():
         """
                 resize image to a size multiple of 32 which is required by the network
-                args:
                     img(array): array with shape [h, w, c]
                 return(tuple):
                     img, (ratio_h, ratio_w)
-                """
         data = {}
         limit_side_len = self.limit_side_len
         h, w, c = img.shape
@@ -102,6 +110,17 @@ class Text_Detection():
     def predict(self, src):
         imgs = []
         src_imgs=[]
         shape_list=[]
@@ -133,6 +152,7 @@ class Text_Detection():
         blob = np.concatenate(imgs, axis=0).astype(np.float32)
         outputs = self.compiled_model([blob])[self.output_layer]
         outputs=self.postprocess_detection(outputs,shape_list)
         return outputs

 class Text_Detection():
     def __init__(self, model_path):
+        '''
+            Args:
+                mode_path(string): path of openvino xml of model
+        '''
         ie = Core()
         print('\n', model_path)
         """
                 resize image to a size multiple of 32 which is required by the network
+                Args:
                     img(array): array with shape [h, w, c]
                 return(tuple):
                     img, (ratio_h, ratio_w)
+        """
         data = {}
         limit_side_len = self.limit_side_len
         h, w, c = img.shape
     def predict(self, src):
+        '''
+            Args:
+                src : either list of images numpy array or list of image filepath string
+            Returns(list):
+                list of bounding boxes co-ordinates of detected texts
+        '''
         imgs = []
         src_imgs=[]
         shape_list=[]
         blob = np.concatenate(imgs, axis=0).astype(np.float32)
         outputs = self.compiled_model([blob])[self.output_layer]
+        print('text detection model output shape:',outputs.shape)
         outputs=self.postprocess_detection(outputs,shape_list)
         return outputs

utils.py CHANGED Viewed

@@ -20,6 +20,20 @@ import pyclipper
 def img_decode(img):
     img = np.frombuffer(img, dtype='uint8')
     img = cv2.imdecode(img, 1)
     # print(img.shape)
@@ -57,8 +71,15 @@ class DBPostProcess(object):
     def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
         '''
-        _bitmap: single map with shape (1, H, W),
-            whose values are binarized as {0, 1}
         '''
         bitmap = _bitmap
@@ -104,8 +125,15 @@ class DBPostProcess(object):
     def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
         '''
-        _bitmap: single map with shape (1, H, W),
-                whose values are binarized as {0, 1}
         '''
         bitmap = _bitmap
@@ -150,6 +178,8 @@ class DBPostProcess(object):
         return np.array(boxes, dtype=np.int16), scores
     def unclip(self, box, unclip_ratio):
         poly = Polygon(box)
         distance = poly.area * unclip_ratio / poly.length
         offset = pyclipper.PyclipperOffset()
@@ -221,6 +251,17 @@ class DBPostProcess(object):
     def sort_boxes(self,boxes):
        ymin_list=[]
        for box in boxes:
            xmin,ymin=box[0]

 def img_decode(img):
+    '''
+      Converts byte array to numpy array
+        Args:
+            img(byte array)
+        Returns:
+            img (numpy array)
+    '''
     img = np.frombuffer(img, dtype='uint8')
     img = cv2.imdecode(img, 1)
     # print(img.shape)
     def polygons_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
         '''
+        Calculates polygon coords & scores from segmentation bitmap
+        Args:
+            pred:  model predictions of shape (N,1,H,W)
+            _bitmap: single map with shape (1, H, W),
+                    whose values are binarized as {0, 1}
+            dest_width: input image width
+            dest_height: input image height
         '''
         bitmap = _bitmap
     def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
         '''
+                Calculates boundingbox coords & scores from segmentation bitmap
+                Args:
+                    pred:  model predictions of shape (N,1,H,W)
+                    _bitmap: single map with shape (1, H, W),
+                            whose values are binarized as {0, 1}
+                    dest_width: input image width
+                    dest_height: input image height
         '''
         bitmap = _bitmap
         return np.array(boxes, dtype=np.int16), scores
     def unclip(self, box, unclip_ratio):
         poly = Polygon(box)
         distance = poly.area * unclip_ratio / poly.length
         offset = pyclipper.PyclipperOffset()
     def sort_boxes(self,boxes):
+       '''Sort boxes along height
+        Args:
+            boxes(numpy): numpy array of boxes
+        Returns:
+            boxes (numpy): sorted numpy array of boxes'''
        ymin_list=[]
        for box in boxes:
            xmin,ymin=box[0]