Spaces:
Sleeping
Sleeping
| ''' | |
| Copyright 2025 Vignesh(VK)Kotteeswaran <[email protected]> | |
| Licensed under the Apache License, Version 2.0 (the "License"); | |
| you may not use this file except in compliance with the License. | |
| You may obtain a copy of the License at | |
| http://www.apache.org/licenses/LICENSE-2.0 | |
| Unless required by applicable law or agreed to in writing, software | |
| distributed under the License is distributed on an "AS IS" BASIS, | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| See the License for the specific language governing permissions and | |
| limitations under the License. | |
| ''' | |
| import numpy as np | |
| from openvino.runtime import Core | |
| from utils import DBPostProcess,img_decode | |
| import cv2 | |
| class Text_Detection(): | |
| def __init__(self, model_path): | |
| ''' | |
| Args: | |
| mode_path(string): path of openvino xml of model | |
| ''' | |
| ie = Core() | |
| print('\n', model_path) | |
| model = ie.read_model(model=model_path) | |
| self.compiled_model = ie.compile_model(model=model, device_name="CPU") | |
| self.input_layer = self.compiled_model.input(0) | |
| self.output_layer = self.compiled_model.output(0) | |
| self.show_frame = None | |
| self.image_shape = None | |
| self.limit_side_len = 736 | |
| self.limit_type = 'min' | |
| self.scale= 1. / 255. | |
| self.mean= [0.485, 0.456, 0.406] | |
| self.std= [0.229, 0.224, 0.225] | |
| self.postprocess_detection=DBPostProcess() | |
| def resize_norm_img(self, img,): | |
| """ | |
| resize image to a size multiple of 32 which is required by the network | |
| Args: | |
| img(array): array with shape [h, w, c] | |
| return(tuple): | |
| img, (ratio_h, ratio_w) | |
| """ | |
| data = {} | |
| limit_side_len = self.limit_side_len | |
| h, w, c = img.shape | |
| # limit the max side | |
| if self.limit_type == 'max': | |
| if max(h, w) > limit_side_len: | |
| if h > w: | |
| ratio = float(limit_side_len) / h | |
| else: | |
| ratio = float(limit_side_len) / w | |
| else: | |
| ratio = 1. | |
| elif self.limit_type == 'min': | |
| if min(h, w) < limit_side_len: | |
| if h < w: | |
| ratio = float(limit_side_len) / h | |
| else: | |
| ratio = float(limit_side_len) / w | |
| else: | |
| ratio = 1. | |
| elif self.limit_type == 'resize_long': | |
| ratio = float(limit_side_len) / max(h, w) | |
| else: | |
| raise Exception('not support limit type, image ') | |
| resize_h = int(h * ratio) | |
| resize_w = int(w * ratio) | |
| resize_h = max(int(round(resize_h / 32) * 32), 32) | |
| resize_w = max(int(round(resize_w / 32) * 32), 32) | |
| try: | |
| if int(resize_w) <= 0 or int(resize_h) <= 0: | |
| return None, (None, None) | |
| img = cv2.resize(img, (int(resize_w), int(resize_h))) | |
| except: | |
| print(img.shape, resize_w, resize_h) | |
| img=(img.astype('float32') * self.scale - self.mean ) / self.std | |
| img=img.transpose((2, 0, 1)) | |
| ratio_h = resize_h / float(h) | |
| ratio_w = resize_w / float(w) | |
| data['img']=img | |
| data['shape_list']=[h,w,ratio_h,ratio_w] | |
| return data | |
| def predict(self, src): | |
| ''' | |
| Args: | |
| src : either list of images numpy array or list of image filepath string | |
| Returns(list): | |
| list of bounding boxes co-ordinates of detected texts | |
| ''' | |
| imgs = [] | |
| src_imgs=[] | |
| shape_list=[] | |
| show_frames = [] | |
| for item in src: | |
| if hasattr(item, 'shape'): | |
| preprocessed_data=self.resize_norm_img(item) | |
| src_imgs.append(item) | |
| elif isinstance(item, str): | |
| with open(item, 'rb') as f: | |
| content = f.read() | |
| decoded_img=img_decode(content) | |
| preprocessed_data = self.resize_norm_img(decoded_img) | |
| src_imgs.append(decoded_img) | |
| else: | |
| return "Error: Invalid Input" | |
| imgs.append(np.expand_dims(preprocessed_data['img'], axis=0)) | |
| shape_list.append(preprocessed_data['shape_list']) | |
| show_frames.append(self.show_frame) | |
| blob = np.concatenate(imgs, axis=0).astype(np.float32) | |
| outputs = self.compiled_model([blob])[self.output_layer] | |
| #print('text detection model output shape:',outputs.shape) | |
| outputs=self.postprocess_detection(outputs,shape_list) | |
| return outputs | |