Spaces:

Pinwheel
/

GLIP-BLIP-Object-Detection-VQA

Runtime error

App Files Files Community

GLIP-BLIP-Object-Detection-VQA / maskrcnn_benchmark /modeling /utils.py

Pinwheel

HF Demo

128757a about 3 years ago

raw

history blame

2.82 kB

	# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
	"""
	Miscellaneous utility functions
	"""

	import torch


	def cat(tensors, dim=0):
	"""
	Efficient version of torch.cat that avoids a copy if there is only a single element in a list
	"""
	assert isinstance(tensors, (list, tuple))
	if len(tensors) == 1:
	return tensors[0]
	return torch.cat(tensors, dim)


	def permute_and_flatten(layer, N, A, C, H, W):
	layer = layer.view(N, -1, C, H, W)
	layer = layer.permute(0, 3, 4, 1, 2)
	layer = layer.reshape(N, -1, C)
	return layer


	def concat_box_prediction_layers(box_regression, box_cls=None, token_logits=None):
	box_regression_flattened = []
	box_cls_flattened = []
	token_logit_flattened = []

	# for each feature level, permute the outputs to make them be in the
	# same format as the labels. Note that the labels are computed for
	# all feature levels concatenated, so we keep the same representation
	# for the objectness and the box_regression
	for box_cls_per_level, box_regression_per_level in zip(
	box_cls, box_regression
	):
	N, AxC, H, W = box_cls_per_level.shape
	Ax4 = box_regression_per_level.shape[1]
	A = Ax4 // 4
	C = AxC // A
	box_cls_per_level = permute_and_flatten(
	box_cls_per_level, N, A, C, H, W
	)
	box_cls_flattened.append(box_cls_per_level)

	box_regression_per_level = permute_and_flatten(
	box_regression_per_level, N, A, 4, H, W
	)
	box_regression_flattened.append(box_regression_per_level)

	if token_logits is not None:
	for token_logit_per_level in token_logits:
	N, AXT, H, W = token_logit_per_level.shape
	T = AXT // A
	token_logit_per_level = permute_and_flatten(
	token_logit_per_level, N, A, T, H, W
	)
	token_logit_flattened.append(token_logit_per_level)

	# concatenate on the first dimension (representing the feature levels), to
	# take into account the way the labels were generated (with all feature maps
	# being concatenated as well)
	box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
	box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

	token_logits_stacked = None
	if token_logits is not None:
	# stacked
	token_logits_stacked = cat(token_logit_flattened, dim=1)

	return box_regression, box_cls, token_logits_stacked


	def round_channels(channels, divisor=8):
	rounded_channels = max(int(channels + divisor / 2.0) // divisor * divisor, divisor)
	if float(rounded_channels) < 0.9 * channels:
	rounded_channels += divisor
	return rounded_channels