Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pycocotools.mask as mask_util | |
| from detectron2.structures import BoxMode | |
| # MotionNet: based on instances_to_coco_json and relevant codes in densepose | |
| def prediction_to_json(instances, img_id: str): | |
| """ | |
| Args: | |
| instances (Instances): the output of the model | |
| img_id (str): the image id in COCO | |
| Returns: | |
| list[dict]: the results in densepose evaluation format | |
| """ | |
| boxes = instances.pred_boxes.tensor.numpy() | |
| boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) | |
| boxes = boxes.tolist() | |
| scores = instances.scores.tolist() | |
| classes = instances.pred_classes.tolist() | |
| # Prediction for MotionNet | |
| # mtype = instances.mtype.squeeze(axis=1).tolist() | |
| # 2.0.3 | |
| if instances.has("pdim"): | |
| pdim = instances.pdim.tolist() | |
| if instances.has("ptrans"): | |
| ptrans = instances.ptrans.tolist() | |
| if instances.has("prot"): | |
| prot = instances.prot.tolist() | |
| mtype = instances.mtype.tolist() | |
| morigin = instances.morigin.tolist() | |
| maxis = instances.maxis.tolist() | |
| mstate = instances.mstate.tolist() | |
| mstatemax = instances.mstatemax.tolist() | |
| if instances.has("mextrinsic"): | |
| mextrinsic = instances.mextrinsic.tolist() | |
| # if motionstate: | |
| # mstate = instances.mstate.tolist() | |
| # MotionNet has masks in the annotation | |
| # use RLE to encode the masks, because they are too large and takes memory | |
| # since this evaluator stores outputs of the entire dataset | |
| rles = [mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] for mask in instances.pred_masks] | |
| for rle in rles: | |
| # "counts" is an array encoded by mask_util as a byte-stream. Python3's | |
| # json writer which always produces strings cannot serialize a bytestream | |
| # unless you decode it. Thankfully, utf-8 works out (which is also what | |
| # the pycocotools/_mask.pyx does). | |
| rle["counts"] = rle["counts"].decode("utf-8") | |
| results = [] | |
| for k in range(len(instances)): | |
| if instances.has("pdim"): | |
| result = { | |
| "image_id": img_id, | |
| "category_id": classes[k], | |
| "bbox": boxes[k], | |
| "score": scores[k], | |
| "segmentation": rles[k], | |
| "pdim": pdim[k], | |
| "ptrans": ptrans[k], | |
| "prot": prot[k], | |
| "mtype": mtype[k], | |
| "morigin": morigin[k], | |
| "maxis": maxis[k], | |
| "mstate": mstate[k], | |
| "mstatemax": mstatemax[k], | |
| } | |
| elif instances.has("mextrinsic"): | |
| result = { | |
| "image_id": img_id, | |
| "category_id": classes[k], | |
| "bbox": boxes[k], | |
| "score": scores[k], | |
| "segmentation": rles[k], | |
| "mtype": mtype[k], | |
| "morigin": morigin[k], | |
| "maxis": maxis[k], | |
| "mextrinsic": mextrinsic[k], | |
| "mstate": mstate[k], | |
| "mstatemax": mstatemax[k], | |
| } | |
| else: | |
| result = { | |
| "image_id": img_id, | |
| "category_id": classes[k], | |
| "bbox": boxes[k], | |
| "score": scores[k], | |
| "segmentation": rles[k], | |
| "mtype": mtype[k], | |
| "morigin": morigin[k], | |
| "maxis": maxis[k], | |
| "mstate": mstate[k], | |
| "mstatemax": mstatemax[k], | |
| } | |
| # if motionstate: | |
| # result["mstate"] = mstate[k] | |
| results.append(result) | |
| return results | |