Spaces:

3dlg-hcvc
/

opdmulti-demo

Sleeping

App Files Files Community

opdmulti-demo / utilities.py

atwang

semi-working demo for one part

5ceacf4 about 2 years ago

raw

history blame

3.65 kB

	import numpy as np
	import pycocotools.mask as mask_util
	from detectron2.structures import BoxMode


	# MotionNet: based on instances_to_coco_json and relevant codes in densepose
	def prediction_to_json(instances, img_id: str):
	"""
	Args:
	instances (Instances): the output of the model
	img_id (str): the image id in COCO

	Returns:
	list[dict]: the results in densepose evaluation format
	"""
	boxes = instances.pred_boxes.tensor.numpy()
	boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
	boxes = boxes.tolist()
	scores = instances.scores.tolist()
	classes = instances.pred_classes.tolist()
	# Prediction for MotionNet
	# mtype = instances.mtype.squeeze(axis=1).tolist()

	# 2.0.3
	if instances.has("pdim"):
	pdim = instances.pdim.tolist()
	if instances.has("ptrans"):
	ptrans = instances.ptrans.tolist()
	if instances.has("prot"):
	prot = instances.prot.tolist()

	mtype = instances.mtype.tolist()
	morigin = instances.morigin.tolist()
	maxis = instances.maxis.tolist()
	mstate = instances.mstate.tolist()
	mstatemax = instances.mstatemax.tolist()
	if instances.has("mextrinsic"):
	mextrinsic = instances.mextrinsic.tolist()

	# if motionstate:
	# mstate = instances.mstate.tolist()

	# MotionNet has masks in the annotation
	# use RLE to encode the masks, because they are too large and takes memory
	# since this evaluator stores outputs of the entire dataset
	rles = [mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks]
	for rle in rles:
	# "counts" is an array encoded by mask_util as a byte-stream. Python3's
	# json writer which always produces strings cannot serialize a bytestream
	# unless you decode it. Thankfully, utf-8 works out (which is also what
	# the pycocotools/_mask.pyx does).
	rle["counts"] = rle["counts"].decode("utf-8")

	results = []
	for k in range(len(instances)):
	if instances.has("pdim"):
	result = {
	"image_id": img_id,
	"category_id": classes[k],
	"bbox": boxes[k],
	"score": scores[k],
	"segmentation": rles[k],
	"pdim": pdim[k],
	"ptrans": ptrans[k],
	"prot": prot[k],
	"mtype": mtype[k],
	"morigin": morigin[k],
	"maxis": maxis[k],
	"mstate": mstate[k],
	"mstatemax": mstatemax[k],
	}
	elif instances.has("mextrinsic"):
	result = {
	"image_id": img_id,
	"category_id": classes[k],
	"bbox": boxes[k],
	"score": scores[k],
	"segmentation": rles[k],
	"mtype": mtype[k],
	"morigin": morigin[k],
	"maxis": maxis[k],
	"mextrinsic": mextrinsic[k],
	"mstate": mstate[k],
	"mstatemax": mstatemax[k],
	}
	else:
	result = {
	"image_id": img_id,
	"category_id": classes[k],
	"bbox": boxes[k],
	"score": scores[k],
	"segmentation": rles[k],
	"mtype": mtype[k],
	"morigin": morigin[k],
	"maxis": maxis[k],
	"mstate": mstate[k],
	"mstatemax": mstatemax[k],
	}
	# if motionstate:
	# result["mstate"] = mstate[k]
	results.append(result)
	return results