Multimodal-CoT

Runtime error

App Files Files Community

Multimodal-CoT / timm /models /layers /split_attn.py

victor010

Duplicate from cooelf/Multimodal-CoT

160d3b1 over 2 years ago

raw

history blame contribute delete

3.09 kB

	""" Split Attention Conv2d (for ResNeSt Models)

	Paper: `ResNeSt: Split-Attention Networks` - /https://arxiv.org/abs/2004.08955

	Adapted from original PyTorch impl at https://github.com/zhanghang1989/ResNeSt

	Modified for torchscript compat, performance, and consistency with timm by Ross Wightman
	"""
	import torch
	import torch.nn.functional as F
	from torch import nn

	from .helpers import make_divisible


	class RadixSoftmax(nn.Module):
	def __init__(self, radix, cardinality):
	super(RadixSoftmax, self).__init__()
	self.radix = radix
	self.cardinality = cardinality

	def forward(self, x):
	batch = x.size(0)
	if self.radix > 1:
	x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
	x = F.softmax(x, dim=1)
	x = x.reshape(batch, -1)
	else:
	x = torch.sigmoid(x)
	return x


	class SplitAttn(nn.Module):
	"""Split-Attention (aka Splat)
	"""
	def __init__(self, in_channels, out_channels=None, kernel_size=3, stride=1, padding=None,
	dilation=1, groups=1, bias=False, radix=2, rd_ratio=0.25, rd_channels=None, rd_divisor=8,
	act_layer=nn.ReLU, norm_layer=None, drop_block=None, **kwargs):
	super(SplitAttn, self).__init__()
	out_channels = out_channels or in_channels
	self.radix = radix
	self.drop_block = drop_block
	mid_chs = out_channels * radix
	if rd_channels is None:
	attn_chs = make_divisible(in_channels * radix * rd_ratio, min_value=32, divisor=rd_divisor)
	else:
	attn_chs = rd_channels * radix

	padding = kernel_size // 2 if padding is None else padding
	self.conv = nn.Conv2d(
	in_channels, mid_chs, kernel_size, stride, padding, dilation,
	groups=groups * radix, bias=bias, **kwargs)
	self.bn0 = norm_layer(mid_chs) if norm_layer else nn.Identity()
	self.act0 = act_layer(inplace=True)
	self.fc1 = nn.Conv2d(out_channels, attn_chs, 1, groups=groups)
	self.bn1 = norm_layer(attn_chs) if norm_layer else nn.Identity()
	self.act1 = act_layer(inplace=True)
	self.fc2 = nn.Conv2d(attn_chs, mid_chs, 1, groups=groups)
	self.rsoftmax = RadixSoftmax(radix, groups)

	def forward(self, x):
	x = self.conv(x)
	x = self.bn0(x)
	if self.drop_block is not None:
	x = self.drop_block(x)
	x = self.act0(x)

	B, RC, H, W = x.shape
	if self.radix > 1:
	x = x.reshape((B, self.radix, RC // self.radix, H, W))
	x_gap = x.sum(dim=1)
	else:
	x_gap = x
	x_gap = x_gap.mean((2, 3), keepdim=True)
	x_gap = self.fc1(x_gap)
	x_gap = self.bn1(x_gap)
	x_gap = self.act1(x_gap)
	x_attn = self.fc2(x_gap)

	x_attn = self.rsoftmax(x_attn).view(B, -1, 1, 1)
	if self.radix > 1:
	out = (x * x_attn.reshape((B, self.radix, RC // self.radix, 1, 1))).sum(dim=1)
	else:
	out = x * x_attn
	return out.contiguous()