Spaces:
Sleeping
Sleeping
| ''' | |
| Copyright 2025 Vignesh(VK)Kotteeswaran <[email protected]> | |
| Licensed under the Apache License, Version 2.0 (the "License"); | |
| you may not use this file except in compliance with the License. | |
| You may obtain a copy of the License at | |
| http://www.apache.org/licenses/LICENSE-2.0 | |
| Unless required by applicable law or agreed to in writing, software | |
| distributed under the License is distributed on an "AS IS" BASIS, | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| See the License for the specific language governing permissions and | |
| limitations under the License. | |
| ''' | |
| import numpy as np | |
| from openvino.runtime import Core | |
| import math | |
| import cv2 | |
| from utils import CTCLabelDecode,img_decode | |
| class OCR(): | |
| def __init__(self,model_path): | |
| ''' | |
| Args: | |
| mode_path(string): path of openvino xml of model | |
| ''' | |
| ie = Core() | |
| print('\n',model_path) | |
| model = ie.read_model(model=model_path) | |
| self.compiled_model = ie.compile_model(model=model, device_name="CPU") | |
| self.input_layer = self.compiled_model.input(0) | |
| self.output_layer = self.compiled_model.output(0) | |
| self.decoder=CTCLabelDecode('dict.txt',True) | |
| self.show_frame=None | |
| self.image_shape=None | |
| self.dynamic_width=False | |
| def resize_norm_img(self,img): | |
| ''' | |
| Args: | |
| img : numpy array | |
| Returns: | |
| returns preprocessed & normalized numpy array of image | |
| ''' | |
| self.image_shape=[3,48,int(img.shape[1]*2)] | |
| imgC,imgH,imgW=self.image_shape | |
| max_wh_ratio = imgW * 1.0 / imgH | |
| h, w = img.shape[0], img.shape[1] | |
| ratio = w * 1.0 / h | |
| max_wh_ratio = min(max(max_wh_ratio, ratio), max_wh_ratio) | |
| imgW = int(imgH * max_wh_ratio) | |
| if math.ceil(imgH * ratio) > imgW: | |
| resized_w = imgW | |
| else: | |
| resized_w = int(math.ceil(imgH * ratio)) | |
| resized_image = cv2.resize(img, (resized_w, imgH)) | |
| self.show_frame=resized_image | |
| resized_image = resized_image.astype('float32') | |
| if self.image_shape[0] == 1: | |
| resized_image = resized_image / 255 | |
| resized_image = resized_image[np.newaxis, :] | |
| else: | |
| resized_image = resized_image.transpose((2, 0, 1)) / 255 | |
| resized_image -= 0.5 | |
| resized_image /= 0.5 | |
| padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) | |
| padding_im[:, :, 0:resized_w] = resized_image | |
| return padding_im | |
| def predict(self,src): | |
| ''' | |
| Args: | |
| src : either list of images numpy array or list of image filepath string | |
| Returns: | |
| list of texts | |
| ''' | |
| imgs=[] | |
| show_frames=[] | |
| for item in src: | |
| if hasattr(item,'shape'): | |
| imgs.append(np.expand_dims(self.resize_norm_img(item),axis=0)) | |
| elif isinstance(item,str): | |
| with open(item, 'rb') as f: | |
| content=f.read() | |
| imgs.append(np.expand_dims(self.resize_norm_img(img_decode(content)),axis=0)) | |
| else: | |
| return "Error: Invalid Input" | |
| show_frames.append(self.show_frame) | |
| blob=np.concatenate(imgs,axis=0).astype(np.float32) | |
| outputs = self.compiled_model([blob])[self.output_layer] | |
| texts=[] | |
| for output in outputs: | |
| output=np.expand_dims(output,axis=0) | |
| curr_text=self.decoder(output)[0][0] | |
| texts.append(curr_text) | |
| return texts | |