Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) OpenMMLab. All rights reserved. | |
| from typing import Optional | |
| import numpy as np | |
| from mmpose.registry import KEYPOINT_CODECS | |
| from mmpose.structures import bbox_cs2xyxy, bbox_xyxy2cs | |
| from .base import BaseKeypointCodec | |
| class EDPoseLabel(BaseKeypointCodec): | |
| r"""Generate keypoint and label coordinates for `ED-Pose`_ by | |
| Yang J. et al (2023). | |
| Note: | |
| - instance number: N | |
| - keypoint number: K | |
| - keypoint dimension: D | |
| - image size: [w, h] | |
| Encoded: | |
| - keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D) | |
| - keypoints_visible (np.ndarray): Keypoint visibility in shape | |
| (N, K, D) | |
| - area (np.ndarray): Area in shape (N) | |
| - bbox (np.ndarray): Bbox in shape (N, 4) | |
| Args: | |
| num_select (int): The number of candidate instances | |
| num_keypoints (int): The Number of keypoints | |
| """ | |
| auxiliary_encode_keys = {'area', 'bboxes', 'img_shape'} | |
| instance_mapping_table = dict( | |
| bbox='bboxes', | |
| keypoints='keypoints', | |
| keypoints_visible='keypoints_visible', | |
| area='areas', | |
| ) | |
| def __init__(self, num_select: int = 100, num_keypoints: int = 17): | |
| super().__init__() | |
| self.num_select = num_select | |
| self.num_keypoints = num_keypoints | |
| def encode( | |
| self, | |
| img_shape, | |
| keypoints: np.ndarray, | |
| keypoints_visible: Optional[np.ndarray] = None, | |
| area: Optional[np.ndarray] = None, | |
| bboxes: Optional[np.ndarray] = None, | |
| ) -> dict: | |
| """Encoding keypoints, area and bbox from input image space to | |
| normalized space. | |
| Args: | |
| - img_shape (Sequence[int]): The shape of image in the format | |
| of (width, height). | |
| - keypoints (np.ndarray): Keypoint coordinates in | |
| shape (N, K, D). | |
| - keypoints_visible (np.ndarray): Keypoint visibility in shape | |
| (N, K) | |
| - area (np.ndarray): | |
| - bboxes (np.ndarray): | |
| Returns: | |
| encoded (dict): Contains the following items: | |
| - keypoint_labels (np.ndarray): The processed keypoints in | |
| shape like (N, K, D). | |
| - keypoints_visible (np.ndarray): Keypoint visibility in shape | |
| (N, K, D) | |
| - area_labels (np.ndarray): The processed target | |
| area in shape (N). | |
| - bboxes_labels: The processed target bbox in | |
| shape (N, 4). | |
| """ | |
| w, h = img_shape | |
| if keypoints_visible is None: | |
| keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32) | |
| if bboxes is not None: | |
| bboxes = np.concatenate(bbox_xyxy2cs(bboxes), axis=-1) | |
| bboxes = bboxes / np.array([w, h, w, h], dtype=np.float32) | |
| if area is not None: | |
| area = area / float(w * h) | |
| if keypoints is not None: | |
| keypoints = keypoints / np.array([w, h], dtype=np.float32) | |
| encoded = dict( | |
| keypoints=keypoints, | |
| area=area, | |
| bbox=bboxes, | |
| keypoints_visible=keypoints_visible) | |
| return encoded | |
| def decode(self, input_shapes: np.ndarray, pred_logits: np.ndarray, | |
| pred_boxes: np.ndarray, pred_keypoints: np.ndarray): | |
| """Select the final top-k keypoints, and decode the results from | |
| normalize size to origin input size. | |
| Args: | |
| input_shapes (Tensor): The size of input image resize. | |
| test_cfg (ConfigType): Config of testing. | |
| pred_logits (Tensor): The result of score. | |
| pred_boxes (Tensor): The result of bbox. | |
| pred_keypoints (Tensor): The result of keypoints. | |
| Returns: | |
| tuple: Decoded boxes, keypoints, and keypoint scores. | |
| """ | |
| # Initialization | |
| num_keypoints = self.num_keypoints | |
| prob = pred_logits.reshape(-1) | |
| # Select top-k instances based on prediction scores | |
| topk_indexes = np.argsort(-prob)[:self.num_select] | |
| topk_values = np.take_along_axis(prob, topk_indexes, axis=0) | |
| scores = np.tile(topk_values[:, np.newaxis], [1, num_keypoints]) | |
| # Decode bounding boxes | |
| topk_boxes = topk_indexes // pred_logits.shape[1] | |
| boxes = bbox_cs2xyxy(*np.split(pred_boxes, [2], axis=-1)) | |
| boxes = np.take_along_axis( | |
| boxes, np.tile(topk_boxes[:, np.newaxis], [1, 4]), axis=0) | |
| # Convert from relative to absolute coordinates | |
| img_h, img_w = np.split(input_shapes, 2, axis=0) | |
| scale_fct = np.hstack([img_w, img_h, img_w, img_h]) | |
| boxes = boxes * scale_fct[np.newaxis, :] | |
| # Decode keypoints | |
| topk_keypoints = topk_indexes // pred_logits.shape[1] | |
| keypoints = np.take_along_axis( | |
| pred_keypoints, | |
| np.tile(topk_keypoints[:, np.newaxis], [1, num_keypoints * 3]), | |
| axis=0) | |
| keypoints = keypoints[:, :(num_keypoints * 2)] | |
| keypoints = keypoints * np.tile( | |
| np.hstack([img_w, img_h]), [num_keypoints])[np.newaxis, :] | |
| keypoints = keypoints.reshape(-1, num_keypoints, 2) | |
| return boxes, keypoints, scores | |