Spaces:

RoyYang0714
/

3D-MOOD

Running on Zero

App Files Files Community

3D-MOOD / opendet3d /eval /detect3d.py

RoyYang0714

feat: Try to build everything locally.

9b33fca 3 months ago

raw

history blame contribute delete

42.4 kB

	"""3D Multiple Object Detection Evaluator."""

	import contextlib
	import copy
	import datetime
	import io
	import itertools
	import json
	import os
	import time
	from collections import defaultdict

	import numpy as np
	import pycocotools.mask as maskUtils
	import torch
	from pycocotools.cocoeval import COCOeval
	from scipy.spatial.distance import cdist
	from terminaltables import AsciiTable
	from vis4d.common.array import array_to_numpy
	from vis4d.common.distributed import all_gather_object_cpu
	from vis4d.common.typing import (
	ArrayLike,
	DictStrAny,
	GenericFunc,
	MetricLogs,
	NDArrayF32,
	NDArrayI64,
	)
	from vis4d.data.const import AxisMode
	from vis4d.eval.base import Evaluator
	from vis4d.eval.coco.detect import xyxy_to_xywh
	from vis4d.op.box.box3d import boxes3d_to_corners
	from vis4d.op.geometry.rotation import quaternion_to_matrix

	from opendet3d.data.datasets.coco3d import COCO3D
	from opendet3d.op.box.box3d import box3d_overlap
	from opendet3d.op.geometric.rotation import so3_relative_angle


	class Detect3DEvaluator(Evaluator):
	"""3D object detection evaluation with COCO format."""

	def __init__(
	self,
	det_map: dict[str, int],
	cat_map: dict[str, int],
	annotation: str,
	id2name: dict[int, str] \| None = None,
	per_class_eval: bool = True,
	eval_prox: bool = False,
	iou_type: str = "bbox",
	num_columns: int = 6,
	base_classes: list[str] \| None = None,
	) -> None:
	"""Create an instance of the class."""
	if id2name is None:
	self.id2name = {v: k for k, v in det_map.items()}
	else:
	self.id2name = id2name

	self.annotation = annotation
	self.per_class_eval = per_class_eval
	self.eval_prox = eval_prox
	self.iou_type = iou_type
	self.num_columns = num_columns
	self.base_classes = base_classes

	self.tp_errors = ["ATE", "AOE", "ASE"]

	category_names = sorted(det_map, key=det_map.get)

	with contextlib.redirect_stdout(io.StringIO()):
	self._coco_gt = COCO3D([annotation], category_names)

	self.cat_map = cat_map

	self.bbox_2D_evals_per_cat_area: DictStrAny = {}
	self.bbox_3D_evals_per_cat_area: DictStrAny = {}
	self._predictions: list[DictStrAny] = []

	def __repr__(self) -> str:
	"""Returns the string representation of the object."""
	return f"3D Object Detection Evaluator with {self.annotation}"

	@property
	def metrics(self) -> list[str]:
	"""Supported metrics.

	Returns:
	list[str]: Metrics to evaluate.
	"""
	return ["2D", "3D"]

	def gather(self, gather_func: GenericFunc) -> None:
	"""Accumulate predictions across processes."""
	all_preds = all_gather_object_cpu(
	self._predictions, use_system_tmp=False
	)
	if all_preds is not None:
	self._predictions = list(itertools.chain(*all_preds))

	def reset(self) -> None:
	"""Reset the saved predictions to start new round of evaluation."""
	self._predictions.clear()
	self.bbox_2D_evals_per_cat_area.clear()
	self.bbox_3D_evals_per_cat_area.clear()

	def process_batch(
	self,
	coco_image_id: list[int],
	pred_boxes: list[ArrayLike],
	pred_scores: list[ArrayLike],
	pred_classes: list[ArrayLike],
	pred_boxes3d: list[ArrayLike] \| None = None,
	) -> None:
	"""Process sample and convert detections to coco format."""
	for i, image_id in enumerate(coco_image_id):
	boxes = array_to_numpy(
	pred_boxes[i].to(torch.float32), n_dims=None, dtype=np.float32
	)
	scores = array_to_numpy(
	pred_scores[i].to(torch.float32), n_dims=None, dtype=np.float32
	)
	classes = array_to_numpy(
	pred_classes[i], n_dims=None, dtype=np.int64
	)

	if pred_boxes3d is not None:
	boxes3d = array_to_numpy(
	pred_boxes3d[i].to(torch.float32),
	n_dims=None,
	dtype=np.float32,
	)
	else:
	boxes3d = None

	self._predictions_to_coco(
	image_id, boxes, boxes3d, scores, classes
	)

	def _predictions_to_coco(
	self,
	img_id: int,
	boxes: NDArrayF32,
	boxes3d: NDArrayF32 \| None,
	scores: NDArrayF32,
	classes: NDArrayI64,
	) -> None:
	"""Convert predictions to COCO format."""
	boxes_xyxy = copy.deepcopy(boxes)
	boxes_xywh = xyxy_to_xywh(boxes_xyxy)

	if boxes3d is not None:
	# FIXME: Make axismode configurable
	corners_3d = boxes3d_to_corners(
	torch.from_numpy(boxes3d), AxisMode.OPENCV
	)

	for i, (box, box_score, box_class) in enumerate(
	zip(boxes_xywh, scores, classes)
	):
	xywh = box.tolist()

	result = {
	"image_id": img_id,
	"bbox": xywh,
	"category_id": self.cat_map[self.id2name[box_class.item()]],
	"score": box_score.item(),
	}

	# mapping to Omni3D format
	if boxes3d is not None:
	result["center_cam"] = boxes3d[i][:3].tolist()

	# wlh to whl
	result["dimensions"] = boxes3d[i][[3, 5, 4]].tolist()

	result["R_cam"] = (
	quaternion_to_matrix(torch.from_numpy(boxes3d[i][6:10]))
	.numpy()
	.tolist()
	)

	corners = corners_3d[i].numpy().tolist()

	result["bbox3D"] = [
	corners[6],
	corners[4],
	corners[0],
	corners[2],
	corners[7],
	corners[5],
	corners[1],
	corners[3],
	]

	result["depth"] = boxes3d[i][2].item()

	self._predictions.append(result)

	def evaluate(self, metric: str) -> tuple[MetricLogs, str]:
	"""Evaluate predictions."""
	if metric == "2D":
	metrics = ["AP", "AP50", "AP75", "AP95", "APs", "APm", "APl"]
	else:
	if self.iou_type == "bbox":
	metrics = ["AP", "AP15", "AP25", "AP50", "APn", "APm", "APf"]
	main_metric = "AP"
	else:
	metrics = ["AP", "ATE", "ASE", "AOE", "ODS"]
	main_metric = "ODS"

	if self.base_classes is not None:
	metrics += [f"{main_metric}_Base", f"{main_metric}_Novel"]

	if len(self._predictions) == 0:
	return {m: 0.0 for m in metrics}, "No predictions to evaluate."

	with contextlib.redirect_stdout(io.StringIO()):
	coco_dt = self._coco_gt.loadRes(self._predictions)

	assert coco_dt is not None
	evaluator = Detect3Deval(
	self._coco_gt,
	coco_dt,
	mode=metric,
	eval_prox=self.eval_prox,
	iou_type=self.iou_type,
	)
	evaluator.evaluate()
	evaluator.accumulate()

	if self.iou_type == "bbox":
	log_str = "\n" + evaluator.summarize()

	# precision: (iou, recall, cls, area range, max dets)
	precisions = evaluator.eval["precision"]
	assert len(self._coco_gt.getCatIds()) == precisions.shape[2]

	if metric == "2D":
	self.bbox_2D_evals_per_cat_area = evaluator.evals_per_cat_area

	score_dict = dict(zip(metrics, evaluator.stats))
	else:
	if self.iou_type == "bbox":
	self.bbox_3D_evals_per_cat_area = evaluator.evals_per_cat_area

	score_dict = dict(zip(metrics, evaluator.stats))
	else:
	trans_tp_errors = evaluator.eval["trans_tp_errors"]
	rot_tp_errors = evaluator.eval["rot_tp_errors"]
	scale_tp_errors = evaluator.eval["scale_tp_errors"]

	precision = precisions[:, :, :, 0, -1]
	precision = precision[precision > -1]
	if precision.size:
	mAP = np.mean(precision).item()
	else:
	mAP = float("nan")

	trans_tp = trans_tp_errors[:, :, :, 0, -1]
	trans_tp = trans_tp[trans_tp > -1]

	rot_tp = rot_tp_errors[:, :, :, 0, -1]
	rot_tp = rot_tp[rot_tp > -1]

	scale_tp = scale_tp_errors[:, :, :, 0, -1]
	scale_tp = scale_tp[scale_tp > -1]

	if trans_tp.size:
	mATE = np.mean(trans_tp).item()
	mAOE = np.mean(rot_tp).item()
	mASE = np.mean(scale_tp).item()

	mODS = (
	np.sum(mAP * 3 + (1 - mATE) + (1 - mAOE) + (1 - mASE))
	/ 6
	)

	else:
	mATE = float("nan")
	mAOE = float("nan")
	mASE = float("nan")
	mODS = float("nan")

	score_dict = {
	"AP": mAP,
	"ATE": mATE,
	"ASE": mASE,
	"AOE": mAOE,
	"ODS": mODS,
	}

	log_str = "\nHigh-level metrics:"
	for k, v in score_dict.items():
	log_str += f"\n{k}: {v:.4f}"

	if self.per_class_eval:
	results_per_category = []
	score_base_list = []
	score_novel_list = []

	for idx, cat_id in enumerate(self._coco_gt.getCatIds()):
	# area range index 0: all area ranges
	# max dets index -1: typically 100 per image
	nm = self._coco_gt.loadCats(cat_id)[0]
	precision = precisions[:, :, idx, 0, -1]
	precision = precision[precision > -1]
	if precision.size:
	ap = np.mean(precision).item()
	else:
	ap = float("nan")

	if self.iou_type == "dist":
	trans_tp = trans_tp_errors[:, :, idx, 0, -1]
	trans_tp = trans_tp[trans_tp > -1]

	rot_tp = rot_tp_errors[:, :, idx, 0, -1]
	rot_tp = rot_tp[rot_tp > -1]

	scale_tp = scale_tp_errors[:, :, idx, 0, -1]
	scale_tp = scale_tp[scale_tp > -1]

	if trans_tp.size:
	ate = np.mean(trans_tp).item()
	aoe = np.mean(rot_tp).item()
	ase = np.mean(scale_tp).item()

	ods = (
	np.sum(ap * 3 + (1 - ate) + (1 - aoe) + (1 - ase))
	/ 6
	)

	else:
	ate = float("nan")
	aoe = float("nan")
	ase = float("nan")
	ods = float("nan")

	results_per_category.append(
	(
	f'{nm["name"]}',
	f"{ap:0.3f}",
	f"{ate:0.3f}",
	f"{ase:0.3f}",
	f"{aoe:0.3f}",
	f"{ods:0.3f}",
	)
	)
	else:
	results_per_category.append(
	(f'{nm["name"]}', f"{ap:0.3f}")
	)

	if self.base_classes is not None:
	if self.iou_type == "dist":
	score = ods
	else:
	score = ap

	if nm["name"] in self.base_classes:
	score_base_list.append(score)
	else:
	score_novel_list.append(score)

	results_flatten = list(itertools.chain(*results_per_category))

	if self.iou_type == "dist":
	num_columns = 6
	headers = ["category", "AP", "ATE", "ASE", "AOE", "ODS"]
	else:
	num_columns = min(
	self.num_columns, len(results_per_category) * 2
	)
	headers = ["category", "AP"] * (num_columns // 2)
	results = itertools.zip_longest(
	*[results_flatten[i::num_columns] for i in range(num_columns)]
	)
	table_data = [headers] + list(results)
	table = AsciiTable(table_data)
	log_str = f"\n{table.table}\n{log_str}"

	if self.base_classes is not None:
	score_dict[f"{main_metric}_Base"] = np.mean(score_base_list).item()
	score_dict[f"{main_metric}_Novel"] = np.mean(
	score_novel_list
	).item()

	return score_dict, log_str

	def save(
	self, metric: str, output_dir: str, prefix: str \| None = None
	) -> None:
	"""Save the results to json files."""
	assert metric in self.metrics

	if prefix is not None:
	result_folder = os.path.join(output_dir, prefix)
	os.makedirs(result_folder, exist_ok=True)
	else:
	result_folder = output_dir

	result_file = os.path.join(
	result_folder, f"detect_{metric}_results.json"
	)

	with open(result_file, mode="w", encoding="utf-8") as f:
	json.dump(self._predictions, f)


	class Detect3Deval(COCOeval):
	"""COCOeval Wrapper for 2D and 3D box evaluation.

	Now it support bbox IoU matching only.
	"""

	def __init__(
	self,
	cocoGt=None,
	cocoDt=None,
	mode: str = "2D",
	iou_type: str = "bbox",
	eval_prox: bool = False,
	):
	"""Initialize Detect3Deval using coco APIs for Gt and Dt.

	Args:
	cocoGt: COCO object with ground truth annotations
	cocoDt: COCO object with detection results
	mode: (str) defines whether to evaluate 2D or 3D performance.
	One of {"2D", "3D"}
	eval_prox: (bool) if True, performs "Proximity Evaluation", i.e.
	evaluates detections in the proximity of the ground truth2D
	boxes. This is used for datasets which are not exhaustively
	annotated.
	"""
	if mode not in {"2D", "3D"}:
	raise Exception(f"{mode} mode is not supported")
	self.mode = mode
	self.iou_type = iou_type
	self.eval_prox = eval_prox

	self.cocoGt = cocoGt # ground truth COCO API
	self.cocoDt = cocoDt # detections COCO API

	# per-image per-category evaluation results [KxAxI] elements
	self.evalImgs = defaultdict(list)

	self.eval = {} # accumulated evaluation results
	self._gts = defaultdict(list) # gt for evaluation
	self._dts = defaultdict(list) # dt for evaluation
	self.params = Detect3DParams(mode=mode, iouType=iou_type) # parameters
	self._paramsEval = {} # parameters for evaluation
	self.stats = [] # result summarization
	self.ious = {} # ious between all gts and dts

	if cocoGt is not None:
	self.params.imgIds = sorted(cocoGt.getImgIds())
	self.params.catIds = sorted(cocoGt.getCatIds())

	self.evals_per_cat_area = None

	def _prepare(self) -> None:
	"""Prepare ._gts and ._dts for evaluation based on params."""
	p = self.params

	if p.useCats:
	gts = self.cocoGt.loadAnns(
	self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
	)
	dts = self.cocoDt.loadAnns(
	self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
	)

	else:
	gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
	dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))

	# set ignore flag
	ignore_flag = "ignore2D" if self.mode == "2D" else "ignore3D"
	for gt in gts:
	gt[ignore_flag] = gt[ignore_flag] if ignore_flag in gt else 0

	self._gts = defaultdict(list) # gt for evaluation
	self._dts = defaultdict(list) # dt for evaluation

	for gt in gts:
	self._gts[gt["image_id"], gt["category_id"]].append(gt)

	for dt in dts:
	self._dts[dt["image_id"], dt["category_id"]].append(dt)

	self.evalImgs = defaultdict(
	list
	) # per-image per-category evaluation results
	self.eval = {} # accumulated evaluation results

	def accumulate(self, p=None) -> None:
	"""Accumulate per image evaluation and store the result in self.eval.

	Args:
	p: input params for evaluation
	"""
	print("Accumulating evaluation results...")
	assert self.evalImgs, "Please run evaluate() first"

	tic = time.time()

	# allows input customized parameters
	if p is None:
	p = self.params

	p.catIds = p.catIds if p.useCats == 1 else [-1]

	T = len(p.iouThrs)
	R = len(p.recThrs)
	K = len(p.catIds) if p.useCats else 1
	A = len(p.areaRng)
	M = len(p.maxDets)

	precision = -np.ones(
	(T, R, K, A, M)
	) # -1 for the precision of absent categories
	trans_tp_errors = -np.ones((T, R, K, A, M))
	rot_tp_errors = -np.ones((T, R, K, A, M))
	scale_tp_errors = -np.ones((T, R, K, A, M))
	recall = -np.ones((T, K, A, M))
	scores = -np.ones((T, R, K, A, M))

	# create dictionary for future indexing
	_pe = self._paramsEval

	catIds = _pe.catIds if _pe.useCats else [-1]
	setK = set(catIds)
	setA = set(map(tuple, _pe.areaRng))
	setM = set(_pe.maxDets)
	setI = set(_pe.imgIds)

	# get inds to evaluate
	catid_list = [k for n, k in enumerate(p.catIds) if k in setK]
	k_list = [n for n, k in enumerate(p.catIds) if k in setK]
	m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
	a_list = [
	n
	for n, a in enumerate(map(lambda x: tuple(x), p.areaRng))
	if a in setA
	]
	i_list = [n for n, i in enumerate(p.imgIds) if i in setI]

	I0 = len(_pe.imgIds)
	A0 = len(_pe.areaRng)

	has_precomputed_evals = not (self.evals_per_cat_area is None)

	if has_precomputed_evals:
	evals_per_cat_area = self.evals_per_cat_area
	else:
	evals_per_cat_area = {}

	# retrieve E at each category, area range, and max number of detections
	for k, (k0, catId) in enumerate(zip(k_list, catid_list)):
	Nk = k0 * A0 * I0
	for a, a0 in enumerate(a_list):
	Na = a0 * I0

	if has_precomputed_evals:
	E = evals_per_cat_area[(catId, a)]

	else:
	E = [self.evalImgs[Nk + Na + i] for i in i_list]
	E = [e for e in E if not e is None]
	evals_per_cat_area[(catId, a)] = E

	if len(E) == 0:
	continue

	for m, maxDet in enumerate(m_list):

	dtScores = np.concatenate(
	[e["dtScores"][0:maxDet] for e in E]
	)

	# different sorting method generates slightly different results.
	# mergesort is used to be consistent as Matlab implementation.
	inds = np.argsort(-dtScores, kind="mergesort")
	dtScoresSorted = dtScores[inds]

	dtm = np.concatenate(
	[e["dtMatches"][:, 0:maxDet] for e in E], axis=1
	)[:, inds]
	dtIg = np.concatenate(
	[e["dtIgnore"][:, 0:maxDet] for e in E], axis=1
	)[:, inds]
	gtIg = np.concatenate([e["gtIgnore"] for e in E])
	npig = np.count_nonzero(gtIg == 0)

	if npig == 0:
	continue

	tps = np.logical_and(dtm, np.logical_not(dtIg))
	fps = np.logical_and(
	np.logical_not(dtm), np.logical_not(dtIg)
	)

	tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float64)
	fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float64)

	# Compute TP error
	if self.iou_type == "dist":
	tems = np.concatenate(
	[e["dtTranslationError"][:, 0:maxDet] for e in E],
	axis=1,
	)[:, inds]

	oems = np.concatenate(
	[e["dtOrientationError"][:, 0:maxDet] for e in E],
	axis=1,
	)[:, inds]

	sems = np.concatenate(
	[e["dtScaleError"][:, 0:maxDet] for e in E], axis=1
	)[:, inds]

	for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
	tp = np.array(tp)
	fp = np.array(fp)
	nd = len(tp)
	rc = tp / npig
	pr = tp / (fp + tp + np.spacing(1))

	q = np.zeros((R,))
	ss = np.zeros((R,))
	tran_tp_error = np.ones((R,))
	rot_tp_error = np.ones((R,))
	scale_tp_error = np.ones((R,))

	if nd:
	recall[t, k, a, m] = rc[-1]

	else:
	recall[t, k, a, m] = 0

	# numpy is slow without cython optimization for accessing elements
	# use python array gets significant speed improvement
	pr = pr.tolist()
	q = q.tolist()
	tran_tp_error = tran_tp_error.tolist()
	rot_tp_error = rot_tp_error.tolist()
	scale_tp_error = scale_tp_error.tolist()

	for i in range(nd - 1, 0, -1):
	if pr[i] > pr[i - 1]:
	pr[i - 1] = pr[i]

	inds = np.searchsorted(rc, p.recThrs, side="left")

	try:
	for ri, pi in enumerate(inds):
	q[ri] = pr[pi]
	ss[ri] = dtScoresSorted[pi]
	if self.iou_type == "dist":
	tran_tp_error[ri] = tems[t][pi]
	rot_tp_error[ri] = oems[t][pi]
	scale_tp_error[ri] = sems[t][pi]
	except:
	pass

	precision[t, :, k, a, m] = np.array(q)
	scores[t, :, k, a, m] = np.array(ss)

	if self.iou_type == "dist":
	trans_tp_errors[t, :, k, a, m] = np.array(
	tran_tp_error
	)
	rot_tp_errors[t, :, k, a, m] = np.array(
	rot_tp_error
	)
	scale_tp_errors[t, :, k, a, m] = np.array(
	scale_tp_error
	)

	self.evals_per_cat_area = evals_per_cat_area

	self.eval = {
	"params": p,
	"counts": [T, R, K, A, M],
	"date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	"precision": precision,
	"recall": recall,
	"scores": scores,
	"trans_tp_errors": trans_tp_errors,
	"rot_tp_errors": rot_tp_errors,
	"scale_tp_errors": scale_tp_errors,
	}

	toc = time.time()
	print("DONE (t={:0.2f}s).".format(toc - tic))

	def evaluate(self) -> None:
	"""Run per image evaluation on given images.

	It will store results (a list of dict) in self.evalImgs
	"""
	print("Running per image evaluation...")

	p = self.params
	print(f"Evaluate annotation type {p.iouType}")

	tic = time.time()

	p.imgIds = list(np.unique(p.imgIds))
	if p.useCats:
	p.catIds = list(np.unique(p.catIds))

	p.maxDets = sorted(p.maxDets)
	self.params = p

	self._prepare()

	catIds = p.catIds if p.useCats else [-1]

	# loop through images, area range, max detection number
	self.ious = {
	(imgId, catId): self.computeIoU(imgId, catId)
	for imgId in p.imgIds
	for catId in catIds
	}

	maxDet = p.maxDets[-1]

	self.evalImgs = [
	self.evaluateImg(imgId, catId, areaRng, maxDet)
	for catId in catIds
	for areaRng in p.areaRng
	for imgId in p.imgIds
	]

	self._paramsEval = copy.deepcopy(self.params)

	toc = time.time()
	print("DONE (t={:0.2f}s).".format(toc - tic))

	def computeIoU(self, imgId, catId) -> tuple[NDArrayF32, NDArrayF32]:
	"""Computes the IoUs by sorting based on score"""
	p = self.params

	if p.useCats:
	gt = self._gts[imgId, catId]
	dt = self._dts[imgId, catId]
	else:
	gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
	dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]

	if len(gt) == 0 and len(dt) == 0:
	return []

	inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
	dt = [dt[i] for i in inds]
	if len(dt) > p.maxDets[-1]:
	dt = dt[0 : p.maxDets[-1]]

	if self.mode == "2D":
	g = [g["bbox"] for g in gt]
	d = [d["bbox"] for d in dt]
	elif self.mode == "3D":
	g = [g["bbox3D"] for g in gt]
	d = [d["bbox3D"] for d in dt]

	# compute iou between each dt and gt region
	# iscrowd is required in builtin maskUtils so we
	# use a dummy buffer for it
	iscrowd = [0 for _ in gt]
	if self.mode == "2D":
	ious = maskUtils.iou(d, g, iscrowd)
	elif len(d) > 0 and len(g) > 0:
	if p.iouType == "bbox":
	dd = torch.tensor(d, dtype=torch.float32)
	gg = torch.tensor(g, dtype=torch.float32)

	ious = box3d_overlap(dd, gg).cpu().numpy()
	else:
	ious = np.zeros((len(d), len(g)))

	dd = [d["center_cam"] for d in dt]
	gg = [g["center_cam"] for g in gt]

	ious = cdist(dd, gg, metric="euclidean")
	else:
	ious = []

	in_prox = None

	if self.eval_prox:
	g = [g["bbox"] for g in gt]
	d = [d["bbox"] for d in dt]
	iscrowd = [0 for o in gt]
	ious2d = maskUtils.iou(d, g, iscrowd)

	if type(ious2d) == list:
	in_prox = []

	else:
	in_prox = ious2d > p.proximity_thresh

	return ious, in_prox

	def evaluateImg(self, imgId, catId, aRng, maxDet):
	"""
	Perform evaluation for single category and image
	Returns:
	dict (single image results)
	"""

	p = self.params
	if p.useCats:
	gt = self._gts[imgId, catId]
	dt = self._dts[imgId, catId]

	else:
	gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
	dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]

	if len(gt) == 0 and len(dt) == 0:
	return None

	flag_range = "area" if self.mode == "2D" else "depth"
	flag_ignore = "ignore2D" if self.mode == "2D" else "ignore3D"

	for g in gt:
	if g[flag_ignore] or (
	g[flag_range] < aRng[0] or g[flag_range] > aRng[1]
	):
	g["_ignore"] = 1
	else:
	g["_ignore"] = 0

	# sort dt highest score first, sort gt ignore last
	gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort")
	gt = [gt[i] for i in gtind]
	dtind = np.argsort([-d["score"] for d in dt], kind="mergesort")
	dt = [dt[i] for i in dtind[0:maxDet]]

	# load computed ious
	ious = (
	self.ious[imgId, catId][0][:, gtind]
	if len(self.ious[imgId, catId][0]) > 0
	else self.ious[imgId, catId][0]
	)

	if self.eval_prox:
	in_prox = (
	self.ious[imgId, catId][1][:, gtind]
	if len(self.ious[imgId, catId][1]) > 0
	else self.ious[imgId, catId][1]
	)

	T = len(p.iouThrs)
	G = len(gt)
	D = len(dt)
	gtm = np.zeros((T, G))
	dtm = np.zeros((T, D))
	tem = np.ones((T, D)) # Translation Error
	sem = np.ones((T, D)) # Scale Error
	oem = np.ones((T, D)) # Oritentation Error
	gtIg = np.array([g["_ignore"] for g in gt])
	dtIg = np.zeros((T, D))

	dist_thres = 1
	if not len(ious) == 0:
	for tind, t in enumerate(p.iouThrs):
	for dind, d in enumerate(dt):

	# information about best match so far (m=-1 -> unmatched)
	iou = min([t, 1 - 1e-10])
	m = -1

	for gind, g in enumerate(gt):
	# in case of proximity evaluation, if not in proximity continue
	if self.eval_prox and not in_prox[dind, gind]:
	continue

	# if this gt already matched, continue
	if gtm[tind, gind] > 0:
	continue

	# if dt matched to reg gt, and on ignore gt, stop
	if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
	break

	# continue to next gt unless better match made
	if p.iouType == "bbox" and ious[dind, gind] < iou:
	continue

	if p.iouType == "dist":
	# Compute Object Radius
	gt_obj_radius = (
	np.linalg.norm(np.array(g["dimensions"])) / 2
	)
	if ious[dind, gind] > gt_obj_radius * iou:
	continue
	else:
	dist_thres = gt_obj_radius * iou

	# if match successful and best so far, store appropriately
	iou = ious[dind, gind]
	m = gind

	# if match made store id of match for both dt and gt
	if m == -1:
	continue

	dtIg[tind, dind] = gtIg[m]
	dtm[tind, dind] = gt[m]["id"]
	gtm[tind, m] = d["id"]

	if p.iouType == "dist":
	# Translation Error
	tem[tind, dind] = np.linalg.norm(
	np.array(d["center_cam"])
	- np.array(gt[m]["center_cam"])
	) / (dist_thres)

	# Orientation Error
	oem[tind, dind] = (
	so3_relative_angle(
	torch.tensor(d["R_cam"])[None],
	torch.tensor(gt[m]["R_cam"])[None],
	cos_bound=1e-2,
	eps=1e-2,
	).item()
	/ np.pi
	)

	# Scale Error
	min_whl = np.minimum(
	d["dimensions"], gt[m]["dimensions"]
	)
	volume_annotation = np.prod(gt[m]["dimensions"])
	volume_result = np.prod(d["dimensions"])

	intersection = np.prod(min_whl)
	union = (
	volume_annotation + volume_result - intersection
	)
	scale_iou = intersection / union

	sem[tind, dind] = 1 - scale_iou

	# set unmatched detections outside of area range to ignore
	a = np.array(
	[d[flag_range] < aRng[0] or d[flag_range] > aRng[1] for d in dt]
	).reshape((1, len(dt)))

	dtIg = np.logical_or(
	dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0))
	)

	# in case of proximity evaluation, ignore detections which are far from gt regions
	if self.eval_prox and len(in_prox) > 0:
	dt_far = in_prox.any(1) == 0
	dtIg = np.logical_or(
	dtIg, np.repeat(dt_far.reshape((1, len(dt))), T, 0)
	)

	# store results for given image and category
	return {
	"image_id": imgId,
	"category_id": catId,
	"aRng": aRng,
	"maxDet": maxDet,
	"dtIds": [d["id"] for d in dt],
	"gtIds": [g["id"] for g in gt],
	"dtMatches": dtm,
	"gtMatches": gtm,
	"dtScores": [d["score"] for d in dt],
	"gtIgnore": gtIg,
	"dtIgnore": dtIg,
	"dtTranslationError": tem,
	"dtScaleError": sem,
	"dtOrientationError": oem,
	}

	def summarize(self):
	"""
	Compute and display summary metrics for evaluation results.
	Note this functin can only be applied on the default parameter setting
	"""

	def _summarize(
	mode, ap=1, iouThr=None, areaRng="all", maxDets=100, log_str=""
	):
	p = self.params
	eval = self.eval

	if mode == "2D":
	if self.iou_type == "bbox":
	iStr = " {:<18} {} @[ IoU={:<9} \| area={:>6s} \| maxDets={:>3d} ] = {:0.3f}"
	else:
	iStr = " {:<18} {} @[ Dist={:<9} \| area={:>6s} \| maxDets={:>3d} ] = {:0.3f}"

	elif mode == "3D":
	if self.iou_type == "bbox":
	iStr = " {:<18} {} @[ IoU={:<9} \| depth={:>6s} \| maxDets={:>3d} ] = {:0.3f}"
	else:
	iStr = " {:<18} {} @[ Dist={:<9} \| depth={:>6s} \| maxDets={:>3d} ] = {:0.3f}"

	titleStr = "Average Precision" if ap == 1 else "Average Recall"
	typeStr = "(AP)" if ap == 1 else "(AR)"

	iouStr = (
	"{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
	if iouThr is None
	else "{:0.2f}".format(iouThr)
	)

	aind = [
	i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng
	]
	mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]

	if ap == 1:

	# dimension of precision: [TxRxKxAxM]
	s = eval["precision"]

	# IoU
	if iouThr is not None:
	t = np.where(np.isclose(iouThr, p.iouThrs.astype(float)))[
	0
	]
	s = s[t]

	s = s[:, :, :, aind, mind]

	else:
	# dimension of recall: [TxKxAxM]
	s = eval["recall"]
	if iouThr is not None:
	t = np.where(iouThr == p.iouThrs)[0]
	s = s[t]
	s = s[:, :, aind, mind]

	if len(s[s > -1]) == 0:
	mean_s = -1

	else:
	mean_s = np.mean(s[s > -1])

	if log_str != "":
	log_str += "\n"

	log_str += "mode={} ".format(mode) + iStr.format(
	titleStr, typeStr, iouStr, areaRng, maxDets, mean_s
	)

	return mean_s, log_str

	def _summarizeDets(mode):

	params = self.params

	# Define the thresholds to be printed
	if mode == "2D":
	thres = [0.5, 0.75, 0.95]
	else:
	if self.iou_type == "bbox":
	thres = [0.15, 0.25, 0.50]
	else:
	thres = [0.5, 0.75, 1.0]

	stats = np.zeros((13,))
	stats[0], log_str = _summarize(mode, 1)

	stats[1], log_str = _summarize(
	mode,
	1,
	iouThr=thres[0],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	stats[2], log_str = _summarize(
	mode,
	1,
	iouThr=thres[1],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	stats[3], log_str = _summarize(
	mode,
	1,
	iouThr=thres[2],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	stats[4], log_str = _summarize(
	mode,
	1,
	areaRng=params.areaRngLbl[1],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	stats[5], log_str = _summarize(
	mode,
	1,
	areaRng=params.areaRngLbl[2],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	stats[6], log_str = _summarize(
	mode,
	1,
	areaRng=params.areaRngLbl[3],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	stats[7], log_str = _summarize(
	mode, 0, maxDets=params.maxDets[0], log_str=log_str
	)

	stats[8], log_str = _summarize(
	mode, 0, maxDets=params.maxDets[1], log_str=log_str
	)

	stats[9], log_str = _summarize(
	mode, 0, maxDets=params.maxDets[2], log_str=log_str
	)

	stats[10], log_str = _summarize(
	mode,
	0,
	areaRng=params.areaRngLbl[1],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	stats[11], log_str = _summarize(
	mode,
	0,
	areaRng=params.areaRngLbl[2],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	stats[12], log_str = _summarize(
	mode,
	0,
	areaRng=params.areaRngLbl[3],
	maxDets=params.maxDets[2],
	log_str=log_str,
	)

	return stats, log_str

	if not self.eval:
	raise Exception("Please run accumulate() first")

	stats, log_str = _summarizeDets(self.mode)
	self.stats = stats

	return log_str


	class Detect3DParams:
	"""Params for the 3d detection evaluation API."""

	def __init__(
	self,
	mode: str = "2D",
	iouType: str = "bbox",
	proximity_thresh: float = 0.3,
	) -> None:
	"""Create an instance of Detect3DParams.

	Args:
	mode: (str) defines whether to evaluate 2D or 3D performance.
	iouType: (str) defines the type of IoU to be used for evaluation.
	proximity_thresh (float): It defines the neighborhood when
	evaluating on non-exhaustively annotated datasets.
	"""
	assert iouType in {"bbox", "dist"}, f"Invalid iouType {iouType}."
	self.iouType = iouType

	if mode == "2D":
	self.setDet2DParams()
	elif mode == "3D":
	self.setDet3DParams()
	else:
	raise Exception(f"{mode} mode is not supported")
	self.mode = mode
	self.proximity_thresh = proximity_thresh

	def setDet2DParams(self) -> None:
	"""Set parameters for 2D detection evaluation."""
	self.imgIds = []
	self.catIds = []

	# np.arange causes trouble. the data point on arange is slightly larger than the true value
	self.iouThrs = np.linspace(
	0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True
	)

	self.recThrs = np.linspace(
	0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True
	)

	self.maxDets = [1, 10, 100]
	self.areaRng = [
	[02, 1e52],
	[02, 322],
	[322, 962],
	[962, 1e52],
	]

	self.areaRngLbl = ["all", "small", "medium", "large"]
	self.useCats = 1

	def setDet3DParams(self) -> None:
	"""Set parameters for 3D detection evaluation."""
	self.imgIds = []
	self.catIds = []

	# np.arange causes trouble. The data point on arange is slightly
	# larger than the true value
	if self.iouType == "bbox":
	self.iouThrs = np.linspace(
	0.05,
	0.5,
	int(np.round((0.5 - 0.05) / 0.05)) + 1,
	endpoint=True,
	)
	else:
	self.iouThrs = np.linspace(
	0.5, 1.0, int(np.round((1.00 - 0.5) / 0.05)) + 1, endpoint=True
	)

	self.recThrs = np.linspace(
	0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True
	)

	self.maxDets = [1, 10, 100]
	self.areaRng = [[0, 1e5], [0, 10], [10, 35], [35, 1e5]]
	self.areaRngLbl = ["all", "near", "medium", "far"]
	self.useCats = 1