Spaces:
Sleeping
Sleeping
| import time | |
| import numpy as np | |
| from PIL import Image | |
| from scipy.spatial.distance import cdist | |
| from scipy.optimize import linear_sum_assignment | |
| class SimpleAffineTransform: | |
| """ | |
| simple affine transform, only translation and scale. | |
| """ | |
| def __init__(self, translation=(0, 0), scale=1.0): | |
| self.translation = np.array(translation) | |
| self.scale = scale | |
| def estimate(self, src, dst): | |
| src_center = np.mean(src, axis=0) | |
| dst_center = np.mean(dst, axis=0) | |
| self.translation = dst_center - src_center | |
| src_dists = np.linalg.norm(src - src_center, axis=1) | |
| dst_dists = np.linalg.norm(dst - dst_center, axis=1) | |
| self.scale = np.mean(dst_dists) / (np.mean(src_dists) + 1e-10) | |
| def inverse(self): | |
| inverse_transform = AffineTransform(-self.translation, 1.0/self.scale) | |
| return inverse_transform | |
| def __call__(self, coords): | |
| return self.scale * (coords - np.mean(coords, axis=0)) + np.mean(coords, axis=0) + self.translation | |
| def residuals(self, src, dst): | |
| return np.sqrt(np.sum((self(src) - dst) ** 2, axis=1)) | |
| def norm_coords(x, left, right): | |
| if x < left: | |
| return left | |
| if x > right: | |
| return right | |
| return x | |
| def norm_same_token(token): | |
| special_map = { | |
| "\\cdot": ".", | |
| "\\mid": "|", | |
| "\\to": "\\rightarrow", | |
| "\\top": "T", | |
| "\\Tilde": "\\tilde", | |
| "\\cdots": "\\dots", | |
| "\\prime": "'", | |
| "\\ast": "*", | |
| "\\left<": "\\langle", | |
| "\\right>": "\\rangle" | |
| } | |
| if token in special_map.keys(): | |
| token = special_map[token] | |
| if token.startswith('\\left') or token.startswith('\\right'): | |
| token = token.replace("\\left", "").replace("\\right", "") | |
| if token.startswith('\\big') or token.startswith('\\Big'): | |
| if "\\" in token[4:]: | |
| token = "\\"+token[4:].split("\\")[-1] | |
| else: | |
| token = token[-1] | |
| if token in ['\\leq', '\\geq']: | |
| return token[0:-1] | |
| if token in ['\\lVert', '\\rVert', '\\Vert']: | |
| return '\\|' | |
| if token in ['\\lvert', '\\rvert', '\\vert']: | |
| return '|' | |
| if token.endswith("rightarrow"): | |
| return "\\rightarrow" | |
| if token.endswith("leftarrow"): | |
| return "\\leftarrow" | |
| if token.startswith('\\wide'): | |
| return token.replace("wide", "") | |
| if token.startswith('\\var'): | |
| return token.replace("\\var", "") | |
| return token | |
| class HungarianMatcher: | |
| def __init__( | |
| self, | |
| cost_token: float = 1, | |
| cost_position: float = 0.05, | |
| cost_order: float = 0.15, | |
| ): | |
| self.cost_token = cost_token | |
| self.cost_position = cost_position | |
| self.cost_order = cost_order | |
| self.cost = {} | |
| def calculate_token_cost_old(self, box_gt, box_pred): | |
| token_cost = np.ones((len(box_gt), len(box_pred))) | |
| for i in range(token_cost.shape[0]): | |
| box1 = box_gt[i] | |
| for j in range(token_cost.shape[1]): | |
| box2 = box_pred[j] | |
| if box1['token'] == box2['token']: | |
| token_cost[i, j] = 0 | |
| elif norm_same_token(box1['token']) == norm_same_token(box2['token']): | |
| token_cost[i, j] = 0.05 | |
| return np.array(token_cost) | |
| def calculate_token_cost(self, box_gt, box_pred): | |
| token2id = {} | |
| for data in box_gt+box_pred: | |
| if data['token'] not in token2id: | |
| token2id[data['token']] = len(token2id) | |
| num_classes = len(token2id) | |
| token2id_norm = {} | |
| for data in box_gt+box_pred: | |
| if norm_same_token(data['token']) not in token2id_norm: | |
| token2id_norm[norm_same_token(data['token'])] = len(token2id_norm) | |
| num_classes_norm = len(token2id_norm) | |
| gt_token_array = [] | |
| norm_gt_token_array = [] | |
| for data in box_gt: | |
| gt_token_array.append(token2id[data['token']]) | |
| norm_gt_token_array.append(token2id_norm[norm_same_token(data['token'])]) | |
| pred_token_logits = [] | |
| norm_pred_token_logits = [] | |
| for data in box_pred: | |
| logits = [0] * num_classes | |
| logits[token2id[data['token']]] = 1 | |
| pred_token_logits.append(logits) | |
| logits_norm = [0] * num_classes_norm | |
| logits_norm[token2id_norm[norm_same_token(data['token'])]] = 1 | |
| norm_pred_token_logits.append(logits_norm) | |
| gt_token_array = np.array(gt_token_array) | |
| pred_token_logits = np.array(pred_token_logits) | |
| norm_gt_token_array = np.array(norm_gt_token_array) | |
| norm_pred_token_logits = np.array(norm_pred_token_logits) | |
| token_cost = 1.0 - pred_token_logits[:, gt_token_array] | |
| norm_token_cost = 1.0 - norm_pred_token_logits[:, norm_gt_token_array] | |
| token_cost[np.logical_and(token_cost==1, norm_token_cost==0)] = 0.05 | |
| return token_cost.T | |
| def box2array(self, box_list, size): | |
| W, H = size | |
| box_array = [] | |
| for box in box_list: | |
| x_min, y_min, x_max, y_max = box['bbox'] | |
| box_array.append([x_min/W, y_min/H, x_max/W, y_max/H]) | |
| return np.array(box_array) | |
| def order2array(self, box_list): | |
| order_array = [] | |
| for idx, box in enumerate(box_list): | |
| order_array.append([idx / len(box_list)]) | |
| return np.array(order_array) | |
| def calculate_l1_cost(self, gt_array, pred_array): | |
| scale = gt_array.shape[-1] | |
| l1_cost = cdist(gt_array, pred_array, 'minkowski', p=1) | |
| return l1_cost / scale | |
| def __call__(self, box_gt, box_pred, gt_size, pred_size): | |
| aa = time.time() | |
| gt_box_array = self.box2array(box_gt, gt_size) | |
| pred_box_array = self.box2array(box_pred, pred_size) | |
| gt_order_array = self.order2array(box_gt) | |
| pred_order_array = self.order2array(box_pred) | |
| token_cost = self.calculate_token_cost(box_gt, box_pred) | |
| position_cost = self.calculate_l1_cost(gt_box_array, pred_box_array) | |
| order_cost = self.calculate_l1_cost(gt_order_array, pred_order_array) | |
| self.cost["token"] = token_cost | |
| self.cost["position"] = position_cost | |
| self.cost["order"] = order_cost | |
| cost = self.cost_token * token_cost + self.cost_position * position_cost + self.cost_order * order_cost | |
| cost[np.isnan(cost) | np.isinf(cost)] = 100 | |
| indexes = linear_sum_assignment(cost) | |
| matched_idxes = [] | |
| for a, b in zip(*indexes): | |
| matched_idxes.append((a, b)) | |
| return matched_idxes |