Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) OpenMMLab. All rights reserved. | |
| from typing import Dict, List, Optional, Tuple | |
| import numpy as np | |
| from mmpose.registry import KEYPOINT_CODECS | |
| from .base import BaseKeypointCodec | |
| INF = 1e6 | |
| NEG_INF = -1e6 | |
| class BaseAnnotationProcessor(BaseKeypointCodec): | |
| """Base class for annotation processors.""" | |
| def decode(self, *args, **kwargs): | |
| pass | |
| class YOLOXPoseAnnotationProcessor(BaseAnnotationProcessor): | |
| """Convert dataset annotations to the input format of YOLOX-Pose. | |
| This processor expands bounding boxes and converts category IDs to labels. | |
| Args: | |
| extend_bbox (bool, optional): Whether to expand the bounding box | |
| to include all keypoints. Defaults to False. | |
| input_size (tuple, optional): The size of the input image for the | |
| model, formatted as (h, w). This argument is necessary for the | |
| codec in deployment but is not used indeed. | |
| """ | |
| auxiliary_encode_keys = {'category_id', 'bbox'} | |
| label_mapping_table = dict( | |
| bbox='bboxes', | |
| bbox_labels='labels', | |
| keypoints='keypoints', | |
| keypoints_visible='keypoints_visible', | |
| area='areas', | |
| ) | |
| instance_mapping_table = dict( | |
| bbox='bboxes', | |
| bbox_score='bbox_scores', | |
| keypoints='keypoints', | |
| keypoints_visible='keypoints_visible', | |
| # remove 'bbox_scales' in default instance_mapping_table to avoid | |
| # length mismatch during training with multiple datasets | |
| ) | |
| def __init__(self, | |
| extend_bbox: bool = False, | |
| input_size: Optional[Tuple] = None): | |
| super().__init__() | |
| self.extend_bbox = extend_bbox | |
| def encode(self, | |
| keypoints: Optional[np.ndarray] = None, | |
| keypoints_visible: Optional[np.ndarray] = None, | |
| bbox: Optional[np.ndarray] = None, | |
| category_id: Optional[List[int]] = None | |
| ) -> Dict[str, np.ndarray]: | |
| """Encode keypoints, bounding boxes, and category IDs. | |
| Args: | |
| keypoints (np.ndarray, optional): Keypoints array. Defaults | |
| to None. | |
| keypoints_visible (np.ndarray, optional): Visibility array for | |
| keypoints. Defaults to None. | |
| bbox (np.ndarray, optional): Bounding box array. Defaults to None. | |
| category_id (List[int], optional): List of category IDs. Defaults | |
| to None. | |
| Returns: | |
| Dict[str, np.ndarray]: Encoded annotations. | |
| """ | |
| results = {} | |
| if self.extend_bbox and bbox is not None: | |
| # Handle keypoints visibility | |
| if keypoints_visible.ndim == 3: | |
| keypoints_visible = keypoints_visible[..., 0] | |
| # Expand bounding box to include keypoints | |
| kpts_min = keypoints.copy() | |
| kpts_min[keypoints_visible == 0] = INF | |
| bbox[..., :2] = np.minimum(bbox[..., :2], kpts_min.min(axis=1)) | |
| kpts_max = keypoints.copy() | |
| kpts_max[keypoints_visible == 0] = NEG_INF | |
| bbox[..., 2:] = np.maximum(bbox[..., 2:], kpts_max.max(axis=1)) | |
| results['bbox'] = bbox | |
| if category_id is not None: | |
| # Convert category IDs to labels | |
| bbox_labels = np.array(category_id).astype(np.int8) - 1 | |
| results['bbox_labels'] = bbox_labels | |
| return results | |