Spaces:

RoyYang0714
/

3D-MOOD

Running on Zero

App Files Files Community

3D-MOOD / vis4d /data /const.py

RoyYang0714

feat: Try to build everything locally.

9b33fca 3 months ago

raw

history blame contribute delete

6.11 kB

	"""Defines data related constants.

	While the datasets can hold arbitrary data types and formats, this file
	provides some constants that are used to define a common data format which is
	helpful to use for better data transformation.
	"""

	from dataclasses import dataclass
	from enum import Enum

	# A custom value to distinguish instance ID and category ID; need to be greater
	# than the number of categories. For a pixel in the panoptic result map:
	# panaptic_id = instance_id * INSTANCE_OFFSET + category_id
	INSTANCE_OFFSET = 1000


	class AxisMode(Enum):
	"""Enum for choosing among different coordinate frame conventions.

	ROS: The coordinate frame aligns with the right hand rule:
	- x axis points forward.
	- y axis points left.
	- z axis points up.
	See also: https://www.ros.org/reps/rep-0103.html#axis-orientation

	OpenCV: The coordinate frame aligns with a camera coordinate system:
	- x axis points right.
	- y axis points down.
	- z axis points forward.
	See also: https://docs.opencv.org/3.4/d9/d0c/group__calib3d.html

	LiDAR: The coordinate frame aligns with a LiDAR coordinate system:
	- x axis points right.
	- y axis points forward.
	- z axis points up.
	See also: https://www.nuscenes.org/nuscenes#data-collection
	"""

	ROS = 0
	OPENCV = 1
	LIDAR = 2


	@dataclass
	class CommonKeys:
	"""Common supported keys for DictData.

	While DictData can hold arbitrary keys of data, we define a common set of
	keys where we expect a pre-defined format to enable the usage of common
	data pre-processing operations among different datasets.

	General Info:
	- sample_names (str): Name of the sample.

	If the dataset contains videos:
	- sequence_names (str): The name of the sequence.
	- frame_ids (int): The temporal frame index of the sample.

	Image Based Inputs:
	- images (NDArrayF32): Image of shape [1, H, W, C].
	- input_hw (Tuple[int, int]): Shape of image in (height, width) after
	transformations.
	- original_images (NDArrayF32): Original image of shape [1, H, W, C].
	- original_hw (Tuple[int, int]): Shape of original image in
	(height, width).

	Image Classification:
	- categories (NDArrayI64): Class labels of shape [1, ].

	2D Object Detection:
	- boxes2d (NDArrayF32): 2D bounding boxes of shape [N, 4] in xyxy
	format.
	- boxes2d_classes (NDArrayI64): Classes of 2D bounding boxes of shape
	[N,].
	- boxes2d_names (List[str]): Names of 2D bounding box classes, same
	order as `boxes2d_classes`.

	2D Object Tracking:
	- boxes2d_track_ids (NDArrayI64): Tracking IDs of 2D bounding boxes of
	shape [N,].

	Segmentation:
	- masks (NDArrayUI8): Segmentation masks of shape [N, H, W].
	- seg_masks (NDArrayUI8): Semantic segmentation masks [H, W].
	- instance_masks (NDArrayUI8): Instance segmentation masks of shape
	[N, H, W].
	- panoptic_masks (NDArrayI64): Panoptic segmentation masks [H, W].

	Depth Estimation:
	- depth_maps (NDArrayF32): Depth maps of shape [H, W].

	Optical Flow:
	- optical_flows (NDArrayF32): Optical flow maps of shape [H, W, 2].

	Sensor Calibration:
	- intrinsics (NDArrayF32): Intrinsic sensor calibration. Shape [3, 3].
	- extrinsics (NDArrayF32): Extrinsic sensor calibration, transformation
	of sensor to world coordinate frame. Shape [4, 4].
	- axis_mode (AxisMode): Coordinate convention of the current sensor.
	- timestamp (int): Sensor timestamp in Unix format.

	3D Point Cloud Data:
	- points3d (NDArrayF32): 3D pointcloud data, assumed to be [N, 3] and
	in sensor frame.
	- colors3d (NDArrayF32): Associated color values for each point [N, 3].

	3D Point Cloud Annotations:
	- semantics3d (NDArrayI64): Semantic classes of 3D points [N, 1].
	- instances3d (NDArrayI64): Instance IDs of 3D points [N, 1].

	3D Object Detection:
	- boxes3d (NDArrayF32): 3D bounding boxes of shape [N, 10], each
	consists of center (XYZ), dimensions (WLH), and orientation
	quaternion (WXYZ).
	- boxes3d_classes (NDArrayI64): Associated semantic classes of 3D
	bounding boxes of shape [N,].
	- boxes3d_names (List[str]): Names of 3D bounding box classes, same
	order as `boxes3d_classes`.
	- boxes3d_track_ids (NDArrayI64): Associated tracking IDs of 3D
	bounding boxes of shape [N,].
	- boxes3d_velocities (NDArrayF32): Associated velocities of 3D bounding
	boxes of shape [N, 3], where each velocity is in the form of
	(vx, vy, vz).
	"""

	# General Info
	sample_names = "sample_names"
	sequence_names = "sequence_names"
	frame_ids = "frame_ids"

	# image based inputs
	images = "images"
	input_hw = "input_hw"
	original_images = "original_images"
	original_hw = "original_hw"

	# Image Classification
	categories = "categories"

	# 2D Object Detection
	boxes2d = "boxes2d"
	boxes2d_classes = "boxes2d_classes"
	boxes2d_names = "boxes2d_names"

	# 2D Object Tracking
	boxes2d_track_ids = "boxes2d_track_ids"

	# Segmentation
	masks = "masks"
	seg_masks = "seg_masks"
	instance_masks = "instance_masks"
	panoptic_masks = "panoptic_masks"

	# Depth Estimation
	depth_maps = "depth_maps"

	# Optical Flow
	optical_flows = "optical_flows"

	# Sensor Calibration
	intrinsics = "intrinsics"
	extrinsics = "extrinsics"
	axis_mode = "axis_mode"
	timestamp = "timestamp"

	# 3D Point Cloud Data
	points3d = "points3d"
	colors3d = "colors3d"

	# 3D Point Cloud Annotations
	semantics3d = "semantics3d"
	instances3d = "instances3d"

	# 3D Object Detection
	boxes3d = "boxes3d"
	boxes3d_classes = "boxes3d_classes"
	boxes3d_names = "boxes3d_names"
	boxes3d_track_ids = "boxes3d_track_ids"
	boxes3d_velocities = "boxes3d_velocities"