Spaces:
Running
on
Zero
Running
on
Zero
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import torch | |
| import torch.nn as nn | |
| from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule | |
| from mmengine.model import BaseModule | |
| from mmengine.utils import digit_version | |
| from torch import Tensor | |
| from mmpose.utils.typing import ConfigType, OptConfigType, OptMultiConfig | |
| class ChannelAttention(BaseModule): | |
| """Channel attention Module. | |
| Args: | |
| channels (int): The input (and output) channels of the attention layer. | |
| init_cfg (dict or list[dict], optional): Initialization config dict. | |
| Defaults to None | |
| """ | |
| def __init__(self, channels: int, init_cfg: OptMultiConfig = None) -> None: | |
| super().__init__(init_cfg=init_cfg) | |
| self.global_avgpool = nn.AdaptiveAvgPool2d(1) | |
| self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True) | |
| if digit_version(torch.__version__) < (1, 7, 0): | |
| self.act = nn.Hardsigmoid() | |
| else: | |
| self.act = nn.Hardsigmoid(inplace=True) | |
| def forward(self, x: Tensor) -> Tensor: | |
| """Forward function for ChannelAttention.""" | |
| with torch.cuda.amp.autocast(enabled=False): | |
| out = self.global_avgpool(x) | |
| out = self.fc(out) | |
| out = self.act(out) | |
| return x * out | |
| class DarknetBottleneck(BaseModule): | |
| """The basic bottleneck block used in Darknet. | |
| Each ResBlock consists of two ConvModules and the input is added to the | |
| final output. Each ConvModule is composed of Conv, BN, and LeakyReLU. | |
| The first convLayer has filter size of 1x1 and the second one has the | |
| filter size of 3x3. | |
| Args: | |
| in_channels (int): The input channels of this Module. | |
| out_channels (int): The output channels of this Module. | |
| expansion (float): The kernel size of the convolution. | |
| Defaults to 0.5. | |
| add_identity (bool): Whether to add identity to the out. | |
| Defaults to True. | |
| use_depthwise (bool): Whether to use depthwise separable convolution. | |
| Defaults to False. | |
| conv_cfg (dict): Config dict for convolution layer. Defaults to None, | |
| which means using conv2d. | |
| norm_cfg (dict): Config dict for normalization layer. | |
| Defaults to dict(type='BN'). | |
| act_cfg (dict): Config dict for activation layer. | |
| Defaults to dict(type='Swish'). | |
| """ | |
| def __init__(self, | |
| in_channels: int, | |
| out_channels: int, | |
| expansion: float = 0.5, | |
| add_identity: bool = True, | |
| use_depthwise: bool = False, | |
| conv_cfg: OptConfigType = None, | |
| norm_cfg: ConfigType = dict( | |
| type='BN', momentum=0.03, eps=0.001), | |
| act_cfg: ConfigType = dict(type='Swish'), | |
| init_cfg: OptMultiConfig = None) -> None: | |
| super().__init__(init_cfg=init_cfg) | |
| hidden_channels = int(out_channels * expansion) | |
| conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule | |
| self.conv1 = ConvModule( | |
| in_channels, | |
| hidden_channels, | |
| 1, | |
| conv_cfg=conv_cfg, | |
| norm_cfg=norm_cfg, | |
| act_cfg=act_cfg) | |
| self.conv2 = conv( | |
| hidden_channels, | |
| out_channels, | |
| 3, | |
| stride=1, | |
| padding=1, | |
| conv_cfg=conv_cfg, | |
| norm_cfg=norm_cfg, | |
| act_cfg=act_cfg) | |
| self.add_identity = \ | |
| add_identity and in_channels == out_channels | |
| def forward(self, x: Tensor) -> Tensor: | |
| """Forward function.""" | |
| identity = x | |
| out = self.conv1(x) | |
| out = self.conv2(out) | |
| if self.add_identity: | |
| return out + identity | |
| else: | |
| return out | |
| class CSPNeXtBlock(BaseModule): | |
| """The basic bottleneck block used in CSPNeXt. | |
| Args: | |
| in_channels (int): The input channels of this Module. | |
| out_channels (int): The output channels of this Module. | |
| expansion (float): Expand ratio of the hidden channel. Defaults to 0.5. | |
| add_identity (bool): Whether to add identity to the out. Only works | |
| when in_channels == out_channels. Defaults to True. | |
| use_depthwise (bool): Whether to use depthwise separable convolution. | |
| Defaults to False. | |
| kernel_size (int): The kernel size of the second convolution layer. | |
| Defaults to 5. | |
| conv_cfg (dict): Config dict for convolution layer. Defaults to None, | |
| which means using conv2d. | |
| norm_cfg (dict): Config dict for normalization layer. | |
| Defaults to dict(type='BN', momentum=0.03, eps=0.001). | |
| act_cfg (dict): Config dict for activation layer. | |
| Defaults to dict(type='SiLU'). | |
| init_cfg (:obj:`ConfigDict` or dict or list[dict] or | |
| list[:obj:`ConfigDict`], optional): Initialization config dict. | |
| Defaults to None. | |
| """ | |
| def __init__(self, | |
| in_channels: int, | |
| out_channels: int, | |
| expansion: float = 0.5, | |
| add_identity: bool = True, | |
| use_depthwise: bool = False, | |
| kernel_size: int = 5, | |
| conv_cfg: OptConfigType = None, | |
| norm_cfg: ConfigType = dict( | |
| type='BN', momentum=0.03, eps=0.001), | |
| act_cfg: ConfigType = dict(type='SiLU'), | |
| init_cfg: OptMultiConfig = None) -> None: | |
| super().__init__(init_cfg=init_cfg) | |
| hidden_channels = int(out_channels * expansion) | |
| conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule | |
| self.conv1 = conv( | |
| in_channels, | |
| hidden_channels, | |
| 3, | |
| stride=1, | |
| padding=1, | |
| norm_cfg=norm_cfg, | |
| act_cfg=act_cfg) | |
| self.conv2 = DepthwiseSeparableConvModule( | |
| hidden_channels, | |
| out_channels, | |
| kernel_size, | |
| stride=1, | |
| padding=kernel_size // 2, | |
| conv_cfg=conv_cfg, | |
| norm_cfg=norm_cfg, | |
| act_cfg=act_cfg) | |
| self.add_identity = \ | |
| add_identity and in_channels == out_channels | |
| def forward(self, x: Tensor) -> Tensor: | |
| """Forward function.""" | |
| identity = x | |
| out = self.conv1(x) | |
| out = self.conv2(out) | |
| if self.add_identity: | |
| return out + identity | |
| else: | |
| return out | |
| class CSPLayer(BaseModule): | |
| """Cross Stage Partial Layer. | |
| Args: | |
| in_channels (int): The input channels of the CSP layer. | |
| out_channels (int): The output channels of the CSP layer. | |
| expand_ratio (float): Ratio to adjust the number of channels of the | |
| hidden layer. Defaults to 0.5. | |
| num_blocks (int): Number of blocks. Defaults to 1. | |
| add_identity (bool): Whether to add identity in blocks. | |
| Defaults to True. | |
| use_cspnext_block (bool): Whether to use CSPNeXt block. | |
| Defaults to False. | |
| use_depthwise (bool): Whether to use depthwise separable convolution in | |
| blocks. Defaults to False. | |
| channel_attention (bool): Whether to add channel attention in each | |
| stage. Defaults to True. | |
| conv_cfg (dict, optional): Config dict for convolution layer. | |
| Defaults to None, which means using conv2d. | |
| norm_cfg (dict): Config dict for normalization layer. | |
| Defaults to dict(type='BN') | |
| act_cfg (dict): Config dict for activation layer. | |
| Defaults to dict(type='Swish') | |
| init_cfg (:obj:`ConfigDict` or dict or list[dict] or | |
| list[:obj:`ConfigDict`], optional): Initialization config dict. | |
| Defaults to None. | |
| """ | |
| def __init__(self, | |
| in_channels: int, | |
| out_channels: int, | |
| expand_ratio: float = 0.5, | |
| num_blocks: int = 1, | |
| add_identity: bool = True, | |
| use_depthwise: bool = False, | |
| use_cspnext_block: bool = False, | |
| channel_attention: bool = False, | |
| conv_cfg: OptConfigType = None, | |
| norm_cfg: ConfigType = dict( | |
| type='BN', momentum=0.03, eps=0.001), | |
| act_cfg: ConfigType = dict(type='Swish'), | |
| init_cfg: OptMultiConfig = None) -> None: | |
| super().__init__(init_cfg=init_cfg) | |
| block = CSPNeXtBlock if use_cspnext_block else DarknetBottleneck | |
| mid_channels = int(out_channels * expand_ratio) | |
| self.channel_attention = channel_attention | |
| self.main_conv = ConvModule( | |
| in_channels, | |
| mid_channels, | |
| 1, | |
| conv_cfg=conv_cfg, | |
| norm_cfg=norm_cfg, | |
| act_cfg=act_cfg) | |
| self.short_conv = ConvModule( | |
| in_channels, | |
| mid_channels, | |
| 1, | |
| conv_cfg=conv_cfg, | |
| norm_cfg=norm_cfg, | |
| act_cfg=act_cfg) | |
| self.final_conv = ConvModule( | |
| 2 * mid_channels, | |
| out_channels, | |
| 1, | |
| conv_cfg=conv_cfg, | |
| norm_cfg=norm_cfg, | |
| act_cfg=act_cfg) | |
| self.blocks = nn.Sequential(*[ | |
| block( | |
| mid_channels, | |
| mid_channels, | |
| 1.0, | |
| add_identity, | |
| use_depthwise, | |
| conv_cfg=conv_cfg, | |
| norm_cfg=norm_cfg, | |
| act_cfg=act_cfg) for _ in range(num_blocks) | |
| ]) | |
| if channel_attention: | |
| self.attention = ChannelAttention(2 * mid_channels) | |
| def forward(self, x: Tensor) -> Tensor: | |
| """Forward function.""" | |
| x_short = self.short_conv(x) | |
| x_main = self.main_conv(x) | |
| x_main = self.blocks(x_main) | |
| x_final = torch.cat((x_main, x_short), dim=1) | |
| if self.channel_attention: | |
| x_final = self.attention(x_final) | |
| return self.final_conv(x_final) | |