Spaces:

RoyYang0714
/

3D-MOOD

Running on Zero

File size: 6,106 Bytes

9b33fca

"""Defines data related constants.

While the datasets can hold arbitrary data types and formats, this file
provides some constants that are used to define a common data format which is
helpful to use for better data transformation.
"""

from dataclasses import dataclass
from enum import Enum

# A custom value to distinguish instance ID and category ID; need to be greater
# than the number of categories. For a pixel in the panoptic result map:
# panaptic_id = instance_id * INSTANCE_OFFSET + category_id
INSTANCE_OFFSET = 1000


class AxisMode(Enum):
    """Enum for choosing among different coordinate frame conventions.

    ROS: The coordinate frame aligns with the right hand rule:
        - x axis points forward.
        - y axis points left.
        - z axis points up.
    See also: https://www.ros.org/reps/rep-0103.html#axis-orientation

    OpenCV: The coordinate frame aligns with a camera coordinate system:
        - x axis points right.
        - y axis points down.
        - z axis points forward.
    See also: https://docs.opencv.org/3.4/d9/d0c/group__calib3d.html

    LiDAR: The coordinate frame aligns with a LiDAR coordinate system:
        - x axis points right.
        - y axis points forward.
        - z axis points up.
    See also: https://www.nuscenes.org/nuscenes#data-collection
    """

    ROS = 0
    OPENCV = 1
    LIDAR = 2


@dataclass
class CommonKeys:
    """Common supported keys for DictData.

    While DictData can hold arbitrary keys of data, we define a common set of
    keys where we expect a pre-defined format to enable the usage of common
    data pre-processing operations among different datasets.

    General Info:
        - sample_names (str): Name of the sample.

    If the dataset contains videos:
        - sequence_names (str): The name of the sequence.
        - frame_ids (int): The temporal frame index of the sample.

    Image Based Inputs:
        - images (NDArrayF32): Image of shape [1, H, W, C].
        - input_hw (Tuple[int, int]): Shape of image in (height, width) after
          transformations.
        - original_images (NDArrayF32): Original image of shape [1, H, W, C].
        - original_hw (Tuple[int, int]): Shape of original image in
          (height, width).

    Image Classification:
        - categories (NDArrayI64): Class labels of shape [1, ].

    2D Object Detection:
        - boxes2d (NDArrayF32): 2D bounding boxes of shape [N, 4] in xyxy
          format.
        - boxes2d_classes (NDArrayI64): Classes of 2D bounding boxes of shape
          [N,].
        - boxes2d_names (List[str]): Names of 2D bounding box classes, same
          order as `boxes2d_classes`.

    2D Object Tracking:
        - boxes2d_track_ids (NDArrayI64): Tracking IDs of 2D bounding boxes of
          shape [N,].

    Segmentation:
        - masks (NDArrayUI8): Segmentation masks of shape [N, H, W].
        - seg_masks (NDArrayUI8): Semantic segmentation masks [H, W].
        - instance_masks (NDArrayUI8): Instance segmentation masks of shape
          [N, H, W].
        - panoptic_masks (NDArrayI64): Panoptic segmentation masks [H, W].

    Depth Estimation:
        - depth_maps (NDArrayF32): Depth maps of shape [H, W].

    Optical Flow:
        - optical_flows (NDArrayF32): Optical flow maps of shape [H, W, 2].

    Sensor Calibration:
        - intrinsics (NDArrayF32): Intrinsic sensor calibration. Shape [3, 3].
        - extrinsics (NDArrayF32): Extrinsic sensor calibration, transformation
          of sensor to world coordinate frame. Shape [4, 4].
        - axis_mode (AxisMode): Coordinate convention of the current sensor.
        - timestamp (int): Sensor timestamp in Unix format.

    3D Point Cloud Data:
        - points3d (NDArrayF32): 3D pointcloud data, assumed to be [N, 3] and
          in sensor frame.
        - colors3d (NDArrayF32): Associated color values for each point [N, 3].

    3D Point Cloud Annotations:
        - semantics3d (NDArrayI64): Semantic classes of 3D points [N, 1].
        - instances3d (NDArrayI64): Instance IDs of 3D points [N, 1].

    3D Object Detection:
        - boxes3d (NDArrayF32): 3D bounding boxes of shape [N, 10], each
          consists of center (XYZ), dimensions (WLH), and orientation
          quaternion (WXYZ).
        - boxes3d_classes (NDArrayI64): Associated semantic classes of 3D
          bounding boxes of shape [N,].
        - boxes3d_names (List[str]): Names of 3D bounding box classes, same
          order as `boxes3d_classes`.
        - boxes3d_track_ids (NDArrayI64): Associated tracking IDs of 3D
          bounding boxes of shape [N,].
        - boxes3d_velocities (NDArrayF32): Associated velocities of 3D bounding
          boxes of shape [N, 3], where each velocity is in the form of
          (vx, vy, vz).
    """

    # General Info
    sample_names = "sample_names"
    sequence_names = "sequence_names"
    frame_ids = "frame_ids"

    # image based inputs
    images = "images"
    input_hw = "input_hw"
    original_images = "original_images"
    original_hw = "original_hw"

    # Image Classification
    categories = "categories"

    # 2D Object Detection
    boxes2d = "boxes2d"
    boxes2d_classes = "boxes2d_classes"
    boxes2d_names = "boxes2d_names"

    # 2D Object Tracking
    boxes2d_track_ids = "boxes2d_track_ids"

    # Segmentation
    masks = "masks"
    seg_masks = "seg_masks"
    instance_masks = "instance_masks"
    panoptic_masks = "panoptic_masks"

    # Depth Estimation
    depth_maps = "depth_maps"

    # Optical Flow
    optical_flows = "optical_flows"

    # Sensor Calibration
    intrinsics = "intrinsics"
    extrinsics = "extrinsics"
    axis_mode = "axis_mode"
    timestamp = "timestamp"

    # 3D Point Cloud Data
    points3d = "points3d"
    colors3d = "colors3d"

    # 3D Point Cloud Annotations
    semantics3d = "semantics3d"
    instances3d = "instances3d"

    # 3D Object Detection
    boxes3d = "boxes3d"
    boxes3d_classes = "boxes3d_classes"
    boxes3d_names = "boxes3d_names"
    boxes3d_track_ids = "boxes3d_track_ids"
    boxes3d_velocities = "boxes3d_velocities"