Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) OpenMMLab. All rights reserved. | |
| from typing import Optional, Tuple | |
| import numpy as np | |
| from mmpose.registry import KEYPOINT_CODECS | |
| from .base import BaseKeypointCodec | |
| class RegressionLabel(BaseKeypointCodec): | |
| r"""Generate keypoint coordinates. | |
| Note: | |
| - instance number: N | |
| - keypoint number: K | |
| - keypoint dimension: D | |
| - image size: [w, h] | |
| Encoded: | |
| - keypoint_labels (np.ndarray): The normalized regression labels in | |
| shape (N, K, D) where D is 2 for 2d coordinates | |
| - keypoint_weights (np.ndarray): The target weights in shape (N, K) | |
| Args: | |
| input_size (tuple): Input image size in [w, h] | |
| """ | |
| label_mapping_table = dict( | |
| keypoint_labels='keypoint_labels', | |
| keypoint_weights='keypoint_weights', | |
| ) | |
| def __init__(self, input_size: Tuple[int, int]) -> None: | |
| super().__init__() | |
| self.input_size = input_size | |
| def encode(self, | |
| keypoints: np.ndarray, | |
| keypoints_visible: Optional[np.ndarray] = None) -> dict: | |
| """Encoding keypoints from input image space to normalized space. | |
| Args: | |
| keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D) | |
| keypoints_visible (np.ndarray): Keypoint visibilities in shape | |
| (N, K) | |
| Returns: | |
| dict: | |
| - keypoint_labels (np.ndarray): The normalized regression labels in | |
| shape (N, K, D) where D is 2 for 2d coordinates | |
| - keypoint_weights (np.ndarray): The target weights in shape | |
| (N, K) | |
| """ | |
| if keypoints_visible is None: | |
| keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32) | |
| w, h = self.input_size | |
| valid = ((keypoints >= 0) & | |
| (keypoints <= [w - 1, h - 1])).all(axis=-1) & ( | |
| keypoints_visible > 0.5) | |
| keypoint_labels = (keypoints / np.array([w, h])).astype(np.float32) | |
| keypoint_weights = np.where(valid, 1., 0.).astype(np.float32) | |
| encoded = dict( | |
| keypoint_labels=keypoint_labels, keypoint_weights=keypoint_weights) | |
| return encoded | |
| def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | |
| """Decode keypoint coordinates from normalized space to input image | |
| space. | |
| Args: | |
| encoded (np.ndarray): Coordinates in shape (N, K, D) | |
| Returns: | |
| tuple: | |
| - keypoints (np.ndarray): Decoded coordinates in shape (N, K, D) | |
| - scores (np.ndarray): The keypoint scores in shape (N, K). | |
| It usually represents the confidence of the keypoint prediction | |
| """ | |
| if encoded.shape[-1] == 2: | |
| N, K, _ = encoded.shape | |
| normalized_coords = encoded.copy() | |
| scores = np.ones((N, K), dtype=np.float32) | |
| elif encoded.shape[-1] == 4: | |
| # split coords and sigma if outputs contain output_sigma | |
| normalized_coords = encoded[..., :2].copy() | |
| output_sigma = encoded[..., 2:4].copy() | |
| scores = (1 - output_sigma).mean(axis=-1) | |
| else: | |
| raise ValueError( | |
| 'Keypoint dimension should be 2 or 4 (with sigma), ' | |
| f'but got {encoded.shape[-1]}') | |
| w, h = self.input_size | |
| keypoints = normalized_coords * np.array([w, h]) | |
| return keypoints, scores | |