Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) OpenMMLab. All rights reserved. | |
| from itertools import product | |
| from typing import Optional, Tuple, Union | |
| import numpy as np | |
| from mmpose.codecs.utils import get_simcc_maximum | |
| from mmpose.codecs.utils.refinement import refine_simcc_dark | |
| from mmpose.registry import KEYPOINT_CODECS | |
| from .base import BaseKeypointCodec | |
| class SimCCLabel(BaseKeypointCodec): | |
| r"""Generate keypoint representation via "SimCC" approach. | |
| See the paper: `SimCC: a Simple Coordinate Classification Perspective for | |
| Human Pose Estimation`_ by Li et al (2022) for more details. | |
| Old name: SimDR | |
| Note: | |
| - instance number: N | |
| - keypoint number: K | |
| - keypoint dimension: D | |
| - image size: [w, h] | |
| Encoded: | |
| - keypoint_x_labels (np.ndarray): The generated SimCC label for x-axis. | |
| The label shape is (N, K, Wx) if ``smoothing_type=='gaussian'`` | |
| and (N, K) if `smoothing_type=='standard'``, where | |
| :math:`Wx=w*simcc_split_ratio` | |
| - keypoint_y_labels (np.ndarray): The generated SimCC label for y-axis. | |
| The label shape is (N, K, Wy) if ``smoothing_type=='gaussian'`` | |
| and (N, K) if `smoothing_type=='standard'``, where | |
| :math:`Wy=h*simcc_split_ratio` | |
| - keypoint_weights (np.ndarray): The target weights in shape (N, K) | |
| Args: | |
| input_size (tuple): Input image size in [w, h] | |
| smoothing_type (str): The SimCC label smoothing strategy. Options are | |
| ``'gaussian'`` and ``'standard'``. Defaults to ``'gaussian'`` | |
| sigma (float | int | tuple): The sigma value in the Gaussian SimCC | |
| label. Defaults to 6.0 | |
| simcc_split_ratio (float): The ratio of the label size to the input | |
| size. For example, if the input width is ``w``, the x label size | |
| will be :math:`w*simcc_split_ratio`. Defaults to 2.0 | |
| label_smooth_weight (float): Label Smoothing weight. Defaults to 0.0 | |
| normalize (bool): Whether to normalize the heatmaps. Defaults to True. | |
| use_dark (bool): Whether to use the DARK post processing. Defaults to | |
| False. | |
| decode_visibility (bool): Whether to decode the visibility. Defaults | |
| to False. | |
| decode_beta (float): The beta value for decoding visibility. Defaults | |
| to 150.0. | |
| .. _`SimCC: a Simple Coordinate Classification Perspective for Human Pose | |
| Estimation`: https://arxiv.org/abs/2107.03332 | |
| """ | |
| label_mapping_table = dict( | |
| keypoint_x_labels='keypoint_x_labels', | |
| keypoint_y_labels='keypoint_y_labels', | |
| keypoint_weights='keypoint_weights', | |
| ) | |
| def __init__( | |
| self, | |
| input_size: Tuple[int, int], | |
| smoothing_type: str = 'gaussian', | |
| sigma: Union[float, int, Tuple[float]] = 6.0, | |
| simcc_split_ratio: float = 2.0, | |
| label_smooth_weight: float = 0.0, | |
| normalize: bool = True, | |
| use_dark: bool = False, | |
| decode_visibility: bool = False, | |
| decode_beta: float = 150.0, | |
| ) -> None: | |
| super().__init__() | |
| self.input_size = input_size | |
| self.smoothing_type = smoothing_type | |
| self.simcc_split_ratio = simcc_split_ratio | |
| self.label_smooth_weight = label_smooth_weight | |
| self.normalize = normalize | |
| self.use_dark = use_dark | |
| self.decode_visibility = decode_visibility | |
| self.decode_beta = decode_beta | |
| if isinstance(sigma, (float, int)): | |
| self.sigma = np.array([sigma, sigma]) | |
| else: | |
| self.sigma = np.array(sigma) | |
| if self.smoothing_type not in {'gaussian', 'standard'}: | |
| raise ValueError( | |
| f'{self.__class__.__name__} got invalid `smoothing_type` value' | |
| f'{self.smoothing_type}. Should be one of ' | |
| '{"gaussian", "standard"}') | |
| if self.smoothing_type == 'gaussian' and self.label_smooth_weight > 0: | |
| raise ValueError('Attribute `label_smooth_weight` is only ' | |
| 'used for `standard` mode.') | |
| if self.label_smooth_weight < 0.0 or self.label_smooth_weight > 1.0: | |
| raise ValueError('`label_smooth_weight` should be in range [0, 1]') | |
| def encode(self, | |
| keypoints: np.ndarray, | |
| keypoints_visible: Optional[np.ndarray] = None) -> dict: | |
| """Encoding keypoints into SimCC labels. Note that the original | |
| keypoint coordinates should be in the input image space. | |
| Args: | |
| keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D) | |
| keypoints_visible (np.ndarray): Keypoint visibilities in shape | |
| (N, K) | |
| Returns: | |
| dict: | |
| - keypoint_x_labels (np.ndarray): The generated SimCC label for | |
| x-axis. | |
| The label shape is (N, K, Wx) if ``smoothing_type=='gaussian'`` | |
| and (N, K) if `smoothing_type=='standard'``, where | |
| :math:`Wx=w*simcc_split_ratio` | |
| - keypoint_y_labels (np.ndarray): The generated SimCC label for | |
| y-axis. | |
| The label shape is (N, K, Wy) if ``smoothing_type=='gaussian'`` | |
| and (N, K) if `smoothing_type=='standard'``, where | |
| :math:`Wy=h*simcc_split_ratio` | |
| - keypoint_weights (np.ndarray): The target weights in shape | |
| (N, K) | |
| """ | |
| if keypoints_visible is None: | |
| keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32) | |
| if self.smoothing_type == 'gaussian': | |
| x_labels, y_labels, keypoint_weights = self._generate_gaussian( | |
| keypoints, keypoints_visible) | |
| elif self.smoothing_type == 'standard': | |
| x_labels, y_labels, keypoint_weights = self._generate_standard( | |
| keypoints, keypoints_visible) | |
| else: | |
| raise ValueError( | |
| f'{self.__class__.__name__} got invalid `smoothing_type` value' | |
| f'{self.smoothing_type}. Should be one of ' | |
| '{"gaussian", "standard"}') | |
| encoded = dict( | |
| keypoint_x_labels=x_labels, | |
| keypoint_y_labels=y_labels, | |
| keypoint_weights=keypoint_weights) | |
| return encoded | |
| def decode(self, simcc_x: np.ndarray, | |
| simcc_y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: | |
| """Decode keypoint coordinates from SimCC representations. The decoded | |
| coordinates are in the input image space. | |
| Args: | |
| encoded (Tuple[np.ndarray, np.ndarray]): SimCC labels for x-axis | |
| and y-axis | |
| simcc_x (np.ndarray): SimCC label for x-axis | |
| simcc_y (np.ndarray): SimCC label for y-axis | |
| Returns: | |
| tuple: | |
| - keypoints (np.ndarray): Decoded coordinates in shape (N, K, D) | |
| - socres (np.ndarray): The keypoint scores in shape (N, K). | |
| It usually represents the confidence of the keypoint prediction | |
| """ | |
| keypoints, scores = get_simcc_maximum(simcc_x, simcc_y) | |
| # Unsqueeze the instance dimension for single-instance results | |
| if keypoints.ndim == 2: | |
| keypoints = keypoints[None, :] | |
| scores = scores[None, :] | |
| if self.use_dark: | |
| x_blur = int((self.sigma[0] * 20 - 7) // 3) | |
| y_blur = int((self.sigma[1] * 20 - 7) // 3) | |
| x_blur -= int((x_blur % 2) == 0) | |
| y_blur -= int((y_blur % 2) == 0) | |
| keypoints[:, :, 0] = refine_simcc_dark(keypoints[:, :, 0], simcc_x, | |
| x_blur) | |
| keypoints[:, :, 1] = refine_simcc_dark(keypoints[:, :, 1], simcc_y, | |
| y_blur) | |
| keypoints /= self.simcc_split_ratio | |
| if self.decode_visibility: | |
| _, visibility = get_simcc_maximum( | |
| simcc_x * self.decode_beta * self.sigma[0], | |
| simcc_y * self.decode_beta * self.sigma[1], | |
| apply_softmax=True) | |
| return keypoints, (scores, visibility) | |
| else: | |
| return keypoints, scores | |
| def _map_coordinates( | |
| self, | |
| keypoints: np.ndarray, | |
| keypoints_visible: Optional[np.ndarray] = None | |
| ) -> Tuple[np.ndarray, np.ndarray]: | |
| """Mapping keypoint coordinates into SimCC space.""" | |
| keypoints_split = keypoints.copy() | |
| keypoints_split = np.around(keypoints_split * self.simcc_split_ratio) | |
| keypoints_split = keypoints_split.astype(np.int64) | |
| keypoint_weights = keypoints_visible.copy() | |
| return keypoints_split, keypoint_weights | |
| def _generate_standard( | |
| self, | |
| keypoints: np.ndarray, | |
| keypoints_visible: Optional[np.ndarray] = None | |
| ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: | |
| """Encoding keypoints into SimCC labels with Standard Label Smoothing | |
| strategy. | |
| Labels will be one-hot vectors if self.label_smooth_weight==0.0 | |
| """ | |
| N, K, _ = keypoints.shape | |
| w, h = self.input_size | |
| W = np.around(w * self.simcc_split_ratio).astype(int) | |
| H = np.around(h * self.simcc_split_ratio).astype(int) | |
| keypoints_split, keypoint_weights = self._map_coordinates( | |
| keypoints, keypoints_visible) | |
| target_x = np.zeros((N, K, W), dtype=np.float32) | |
| target_y = np.zeros((N, K, H), dtype=np.float32) | |
| for n, k in product(range(N), range(K)): | |
| # skip unlabled keypoints | |
| if keypoints_visible[n, k] < 0.5: | |
| continue | |
| # get center coordinates | |
| mu_x, mu_y = keypoints_split[n, k].astype(np.int64) | |
| # detect abnormal coords and assign the weight 0 | |
| if mu_x >= W or mu_y >= H or mu_x < 0 or mu_y < 0: | |
| keypoint_weights[n, k] = 0 | |
| continue | |
| if self.label_smooth_weight > 0: | |
| target_x[n, k] = self.label_smooth_weight / (W - 1) | |
| target_y[n, k] = self.label_smooth_weight / (H - 1) | |
| target_x[n, k, mu_x] = 1.0 - self.label_smooth_weight | |
| target_y[n, k, mu_y] = 1.0 - self.label_smooth_weight | |
| return target_x, target_y, keypoint_weights | |
| def _generate_gaussian( | |
| self, | |
| keypoints: np.ndarray, | |
| keypoints_visible: Optional[np.ndarray] = None | |
| ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: | |
| """Encoding keypoints into SimCC labels with Gaussian Label Smoothing | |
| strategy.""" | |
| N, K, _ = keypoints.shape | |
| w, h = self.input_size | |
| W = np.around(w * self.simcc_split_ratio).astype(int) | |
| H = np.around(h * self.simcc_split_ratio).astype(int) | |
| keypoints_split, keypoint_weights = self._map_coordinates( | |
| keypoints, keypoints_visible) | |
| target_x = np.zeros((N, K, W), dtype=np.float32) | |
| target_y = np.zeros((N, K, H), dtype=np.float32) | |
| # 3-sigma rule | |
| radius = self.sigma * 3 | |
| # xy grid | |
| x = np.arange(0, W, 1, dtype=np.float32) | |
| y = np.arange(0, H, 1, dtype=np.float32) | |
| for n, k in product(range(N), range(K)): | |
| # skip unlabled keypoints | |
| if keypoints_visible[n, k] < 0.5: | |
| continue | |
| mu = keypoints_split[n, k] | |
| # check that the gaussian has in-bounds part | |
| left, top = mu - radius | |
| right, bottom = mu + radius + 1 | |
| if left >= W or top >= H or right < 0 or bottom < 0: | |
| keypoint_weights[n, k] = 0 | |
| continue | |
| mu_x, mu_y = mu | |
| target_x[n, k] = np.exp(-((x - mu_x)**2) / (2 * self.sigma[0]**2)) | |
| target_y[n, k] = np.exp(-((y - mu_y)**2) / (2 * self.sigma[1]**2)) | |
| if self.normalize: | |
| norm_value = self.sigma * np.sqrt(np.pi * 2) | |
| target_x /= norm_value[0] | |
| target_y /= norm_value[1] | |
| return target_x, target_y, keypoint_weights | |