Spaces:

Pinwheel
/

GLIP-BLIP-Object-Detection-VQA

Runtime error

App Files Files Community

GLIP-BLIP-Object-Detection-VQA / maskrcnn_benchmark /data /datasets /lvis.py

Pinwheel

HF Demo

128757a about 3 years ago

raw

history blame contribute delete

9.14 kB

	# Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
	# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
	import json
	import os
	import time
	from collections import defaultdict

	import pycocotools.mask as mask_utils
	import torchvision
	from PIL import Image

	# from .coco import ConvertCocoPolysToMask, make_coco_transforms
	from .modulated_coco import ConvertCocoPolysToMask


	def _isArrayLike(obj):
	return hasattr(obj, "__iter__") and hasattr(obj, "__len__")


	class LVIS:
	def __init__(self, annotation_path=None):
	"""Class for reading and visualizing annotations.
	Args:
	annotation_path (str): location of annotation file
	"""
	self.anns = {}
	self.cats = {}
	self.imgs = {}
	self.img_ann_map = defaultdict(list)
	self.cat_img_map = defaultdict(list)
	self.dataset = {}

	if annotation_path is not None:
	print("Loading annotations.")

	tic = time.time()
	self.dataset = self._load_json(annotation_path)
	print("Done (t={:0.2f}s)".format(time.time() - tic))

	assert type(self.dataset) == dict, "Annotation file format {} not supported.".format(type(self.dataset))
	self._create_index()

	def _load_json(self, path):
	with open(path, "r") as f:
	return json.load(f)

	def _create_index(self):
	print("Creating index.")

	self.img_ann_map = defaultdict(list)
	self.cat_img_map = defaultdict(list)

	self.anns = {}
	self.cats = {}
	self.imgs = {}

	for ann in self.dataset["annotations"]:
	self.img_ann_map[ann["image_id"]].append(ann)
	self.anns[ann["id"]] = ann

	for img in self.dataset["images"]:
	self.imgs[img["id"]] = img

	for cat in self.dataset["categories"]:
	self.cats[cat["id"]] = cat

	for ann in self.dataset["annotations"]:
	self.cat_img_map[ann["category_id"]].append(ann["image_id"])

	print("Index created.")

	def get_ann_ids(self, img_ids=None, cat_ids=None, area_rng=None):
	"""Get ann ids that satisfy given filter conditions.
	Args:
	img_ids (int array): get anns for given imgs
	cat_ids (int array): get anns for given cats
	area_rng (float array): get anns for a given area range. e.g [0, inf]
	Returns:
	ids (int array): integer array of ann ids
	"""
	if img_ids is not None:
	img_ids = img_ids if _isArrayLike(img_ids) else [img_ids]
	if cat_ids is not None:
	cat_ids = cat_ids if _isArrayLike(cat_ids) else [cat_ids]
	anns = []
	if img_ids is not None:
	for img_id in img_ids:
	anns.extend(self.img_ann_map[img_id])
	else:
	anns = self.dataset["annotations"]

	# return early if no more filtering required
	if cat_ids is None and area_rng is None:
	return [_ann["id"] for _ann in anns]

	cat_ids = set(cat_ids)

	if area_rng is None:
	area_rng = [0, float("inf")]

	ann_ids = [
	_ann["id"]
	for _ann in anns
	if _ann["category_id"] in cat_ids and _ann["area"] > area_rng[0] and _ann["area"] < area_rng[1]
	]
	return ann_ids

	def get_cat_ids(self):
	"""Get all category ids.
	Returns:
	ids (int array): integer array of category ids
	"""
	return list(self.cats.keys())

	def get_img_ids(self):
	"""Get all img ids.
	Returns:
	ids (int array): integer array of image ids
	"""
	return list(self.imgs.keys())

	def _load_helper(self, _dict, ids):
	if ids is None:
	return list(_dict.values())
	elif _isArrayLike(ids):
	return [_dict[id] for id in ids]
	else:
	return [_dict[ids]]

	def load_anns(self, ids=None):
	"""Load anns with the specified ids. If ids=None load all anns.
	Args:
	ids (int array): integer array of annotation ids
	Returns:
	anns (dict array) : loaded annotation objects
	"""
	return self._load_helper(self.anns, ids)

	def load_cats(self, ids):
	"""Load categories with the specified ids. If ids=None load all
	categories.
	Args:
	ids (int array): integer array of category ids
	Returns:
	cats (dict array) : loaded category dicts
	"""
	return self._load_helper(self.cats, ids)

	def load_imgs(self, ids):
	"""Load categories with the specified ids. If ids=None load all images.
	Args:
	ids (int array): integer array of image ids
	Returns:
	imgs (dict array) : loaded image dicts
	"""
	return self._load_helper(self.imgs, ids)

	def download(self, save_dir, img_ids=None):
	"""Download images from mscoco.org server.
	Args:
	save_dir (str): dir to save downloaded images
	img_ids (int array): img ids of images to download
	"""
	imgs = self.load_imgs(img_ids)

	if not os.path.exists(save_dir):
	os.makedirs(save_dir)

	for img in imgs:
	file_name = os.path.join(save_dir, img["file_name"])
	if not os.path.exists(file_name):
	from urllib.request import urlretrieve

	urlretrieve(img["coco_url"], file_name)

	def ann_to_rle(self, ann):
	"""Convert annotation which can be polygons, uncompressed RLE to RLE.
	Args:
	ann (dict) : annotation object
	Returns:
	ann (rle)
	"""
	img_data = self.imgs[ann["image_id"]]
	h, w = img_data["height"], img_data["width"]
	segm = ann["segmentation"]
	if isinstance(segm, list):
	# polygon -- a single object might consist of multiple parts
	# we merge all parts into one mask rle code
	rles = mask_utils.frPyObjects(segm, h, w)
	rle = mask_utils.merge(rles)
	elif isinstance(segm["counts"], list):
	# uncompressed RLE
	rle = mask_utils.frPyObjects(segm, h, w)
	else:
	# rle
	rle = ann["segmentation"]
	return rle

	def ann_to_mask(self, ann):
	"""Convert annotation which can be polygons, uncompressed RLE, or RLE
	to binary mask.
	Args:
	ann (dict) : annotation object
	Returns:
	binary mask (numpy 2D array)
	"""
	rle = self.ann_to_rle(ann)
	return mask_utils.decode(rle)


	class LvisDetectionBase(torchvision.datasets.VisionDataset):
	def __init__(self, root, annFile, transform=None, target_transform=None, transforms=None):
	super(LvisDetectionBase, self).__init__(root, transforms, transform, target_transform)
	self.lvis = LVIS(annFile)
	self.ids = list(sorted(self.lvis.imgs.keys()))

	def __getitem__(self, index):
	"""
	Args:
	index (int): Index
	Returns:
	tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
	"""
	lvis = self.lvis
	img_id = self.ids[index]
	ann_ids = lvis.get_ann_ids(img_ids=img_id)
	target = lvis.load_anns(ann_ids)

	path = "/".join(self.lvis.load_imgs(img_id)[0]["coco_url"].split("/")[-2:])

	img = Image.open(os.path.join(self.root, path)).convert("RGB")
	if self.transforms is not None:
	img, target = self.transforms(img, target)

	return img, target


	def __len__(self):
	return len(self.ids)


	class LvisDetection(LvisDetectionBase):
	def __init__(self, img_folder, ann_file, transforms, return_masks=False, **kwargs):
	super(LvisDetection, self).__init__(img_folder, ann_file)
	self.ann_file = ann_file
	self._transforms = transforms
	self.prepare = ConvertCocoPolysToMask(return_masks)

	def __getitem__(self, idx):
	img, target = super(LvisDetection, self).__getitem__(idx)
	image_id = self.ids[idx]
	target = {"image_id": image_id, "annotations": target}
	img, target = self.prepare(img, target)
	if self._transforms is not None:
	img = self._transforms(img)
	return img, target, idx

	def get_raw_image(self, idx):
	img, target = super(LvisDetection, self).__getitem__(idx)
	return img

	def categories(self):
	id2cat = {c["id"]: c for c in self.lvis.dataset["categories"]}
	all_cats = sorted(list(id2cat.keys()))
	categories = {}
	for l in list(all_cats):
	categories[l] = id2cat[l]['name']
	return categories