Spaces:
Running
on
Zero
Running
on
Zero
| """DLA base model.""" | |
| from __future__ import annotations | |
| import math | |
| from collections.abc import Sequence | |
| import torch | |
| from torch import Tensor, nn | |
| from torch.utils.checkpoint import checkpoint | |
| from vis4d.common.ckpt import load_model_checkpoint | |
| from .base import BaseModel | |
| BN_MOMENTUM = 0.1 | |
| DLA_MODEL_PREFIX = "http://dl.yf.io/dla/models/imagenet" | |
| DLA_MODEL_MAPPING = { | |
| "dla34": "dla34-ba72cf86.pth", | |
| "dla46_c": "dla46_c-2bfd52c3.pth", | |
| "dla46x_c": "dla46x_c-d761bae7.pth", | |
| "dla60x_c": "dla60x_c-b870c45c.pth", | |
| "dla60": "dla60-24839fc4.pth", | |
| "dla60x": "dla60x-d15cacda.pth", | |
| "dla102": "dla102-d94d9790.pth", | |
| "dla102x": "dla102x-ad62be81.pth", | |
| "dla102x2": "dla102x2-262837b6.pth", | |
| "dla169": "dla169-0914e092.pth", | |
| } | |
| DLA_ARCH_SETTINGS = { # pylint: disable=consider-using-namedtuple-or-dataclass | |
| "dla34": ( | |
| (1, 1, 1, 2, 2, 1), | |
| (16, 32, 64, 128, 256, 512), | |
| False, | |
| "BasicBlock", | |
| ), | |
| "dla46_c": ( | |
| (1, 1, 1, 2, 2, 1), | |
| (16, 32, 64, 64, 128, 256), | |
| False, | |
| "Bottleneck", | |
| ), | |
| "dla46x_c": ( | |
| (1, 1, 1, 2, 2, 1), | |
| (16, 32, 64, 64, 128, 256), | |
| False, | |
| "BottleneckX", | |
| ), | |
| "dla60x_c": ( | |
| (1, 1, 1, 2, 3, 1), | |
| (16, 32, 64, 64, 128, 256), | |
| False, | |
| "BottleneckX", | |
| ), | |
| "dla60": ( | |
| (1, 1, 1, 2, 3, 1), | |
| (16, 32, 128, 256, 512, 1024), | |
| False, | |
| "Bottleneck", | |
| ), | |
| "dla60x": ( | |
| (1, 1, 1, 2, 3, 1), | |
| (16, 32, 128, 256, 512, 1024), | |
| False, | |
| "BottleneckX", | |
| ), | |
| "dla102": ( | |
| (1, 1, 1, 3, 4, 1), | |
| (16, 32, 128, 256, 512, 1024), | |
| True, | |
| "Bottleneck", | |
| ), | |
| "dla102x": ( | |
| (1, 1, 1, 3, 4, 1), | |
| (16, 32, 128, 256, 512, 1024), | |
| True, | |
| "BottleneckX", | |
| ), | |
| "dla102x2": ( | |
| (1, 1, 1, 3, 4, 1), | |
| (16, 32, 128, 256, 512, 1024), | |
| True, | |
| "BottleneckX", | |
| ), | |
| "dla169": ( | |
| (1, 1, 2, 3, 5, 1), | |
| (16, 32, 128, 256, 512, 1024), | |
| True, | |
| "Bottleneck", | |
| ), | |
| } | |
| class BasicBlock(nn.Module): | |
| """BasicBlock.""" | |
| def __init__( | |
| self, | |
| inplanes: int, | |
| planes: int, | |
| stride: int = 1, | |
| dilation: int = 1, | |
| with_cp: bool = False, | |
| ) -> None: | |
| """Creates an instance of the class.""" | |
| super().__init__() | |
| self.conv1 = nn.Conv2d( | |
| inplanes, | |
| planes, | |
| kernel_size=3, | |
| stride=stride, | |
| padding=dilation, | |
| bias=False, | |
| dilation=dilation, | |
| ) | |
| self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |
| self.relu = nn.ReLU(inplace=True) | |
| self.conv2 = nn.Conv2d( | |
| planes, | |
| planes, | |
| kernel_size=3, | |
| stride=1, | |
| padding=dilation, | |
| bias=False, | |
| dilation=dilation, | |
| ) | |
| self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |
| self.stride = stride | |
| self.with_cp = with_cp | |
| def forward( | |
| self, input_x: Tensor, residual: None | Tensor = None | |
| ) -> Tensor: | |
| """Forward.""" | |
| def _inner_forward( | |
| input_x: Tensor, residual: None | Tensor = None | |
| ) -> Tensor: | |
| if residual is None: | |
| residual = input_x | |
| out = self.conv1(input_x) | |
| out = self.bn1(out) | |
| out = self.relu(out) | |
| out = self.conv2(out) | |
| out = self.bn2(out) | |
| out += residual | |
| return out | |
| if self.with_cp and input_x.requires_grad: | |
| out = checkpoint( | |
| _inner_forward, input_x, residual, use_reentrant=True | |
| ) | |
| else: | |
| out = _inner_forward(input_x, residual) | |
| out = self.relu(out) | |
| return out | |
| class Bottleneck(nn.Module): | |
| """Bottleneck.""" | |
| expansion = 2 | |
| def __init__( | |
| self, | |
| inplanes: int, | |
| planes: int, | |
| stride: int = 1, | |
| dilation: int = 1, | |
| with_cp: bool = False, | |
| ) -> None: | |
| """Creates an instance of the class.""" | |
| super().__init__() | |
| expansion = Bottleneck.expansion | |
| bottle_planes = planes // expansion | |
| self.conv1 = nn.Conv2d( | |
| inplanes, bottle_planes, kernel_size=1, bias=False | |
| ) | |
| self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM) | |
| self.conv2 = nn.Conv2d( | |
| bottle_planes, | |
| bottle_planes, | |
| kernel_size=3, | |
| stride=stride, | |
| padding=dilation, | |
| bias=False, | |
| dilation=dilation, | |
| ) | |
| self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM) | |
| self.conv3 = nn.Conv2d( | |
| bottle_planes, planes, kernel_size=1, bias=False | |
| ) | |
| self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |
| self.relu = nn.ReLU(inplace=True) | |
| self.stride = stride | |
| self.with_cp = with_cp | |
| def forward( | |
| self, input_x: Tensor, residual: None | Tensor = None | |
| ) -> Tensor: | |
| """Forward.""" | |
| def _inner_forward( | |
| input_x: Tensor, residual: None | Tensor = None | |
| ) -> Tensor: | |
| if residual is None: | |
| residual = input_x | |
| out = self.conv1(input_x) | |
| out = self.bn1(out) | |
| out = self.relu(out) | |
| out = self.conv2(out) | |
| out = self.bn2(out) | |
| out = self.relu(out) | |
| out = self.conv3(out) | |
| out = self.bn3(out) | |
| out += residual | |
| return out | |
| if self.with_cp and input_x.requires_grad: | |
| out = checkpoint( | |
| _inner_forward, input_x, residual, use_reentrant=True | |
| ) | |
| else: | |
| out = _inner_forward(input_x, residual) | |
| out = self.relu(out) | |
| return out | |
| class BottleneckX(nn.Module): | |
| """BottleneckX.""" | |
| expansion = 2 | |
| cardinality = 32 | |
| def __init__( | |
| self, | |
| inplanes: int, | |
| planes: int, | |
| stride: int = 1, | |
| dilation: int = 1, | |
| with_cp: bool = False, | |
| ) -> None: | |
| """Creates an instance of the class.""" | |
| super().__init__() | |
| cardinality = BottleneckX.cardinality | |
| bottle_planes = planes * cardinality // 32 | |
| self.conv1 = nn.Conv2d( | |
| inplanes, bottle_planes, kernel_size=1, bias=False | |
| ) | |
| self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM) | |
| self.conv2 = nn.Conv2d( | |
| bottle_planes, | |
| bottle_planes, | |
| kernel_size=3, | |
| stride=stride, | |
| padding=dilation, | |
| bias=False, | |
| dilation=dilation, | |
| groups=cardinality, | |
| ) | |
| self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM) | |
| self.conv3 = nn.Conv2d( | |
| bottle_planes, planes, kernel_size=1, bias=False | |
| ) | |
| self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) | |
| self.relu = nn.ReLU(inplace=True) | |
| self.stride = stride | |
| self.with_cp = with_cp | |
| def forward( | |
| self, input_x: Tensor, residual: None | Tensor = None | |
| ) -> Tensor: | |
| """Forward.""" | |
| def _inner_forward( | |
| input_x: Tensor, residual: None | Tensor = None | |
| ) -> Tensor: | |
| if residual is None: | |
| residual = input_x | |
| out = self.conv1(input_x) | |
| out = self.bn1(out) | |
| out = self.relu(out) | |
| out = self.conv2(out) | |
| out = self.bn2(out) | |
| out = self.relu(out) | |
| out = self.conv3(out) | |
| out = self.bn3(out) | |
| out += residual | |
| return out | |
| if self.with_cp and input_x.requires_grad: | |
| out = checkpoint( | |
| _inner_forward, input_x, residual, use_reentrant=True | |
| ) | |
| else: | |
| out = _inner_forward(input_x, residual) | |
| out = self.relu(out) | |
| return out | |
| class Root(nn.Module): | |
| """Root.""" | |
| def __init__( | |
| self, | |
| in_channels: int, | |
| out_channels: int, | |
| kernel_size: int, | |
| residual: bool, | |
| with_cp: bool = False, | |
| ) -> None: | |
| """Creates an instance of the class.""" | |
| super().__init__() | |
| self.conv = nn.Conv2d( | |
| in_channels, | |
| out_channels, | |
| 1, | |
| stride=1, | |
| bias=False, | |
| padding=(kernel_size - 1) // 2, | |
| ) | |
| self.bn = nn.BatchNorm2d( # pylint: disable=invalid-name | |
| out_channels, momentum=BN_MOMENTUM | |
| ) | |
| self.relu = nn.ReLU(inplace=True) | |
| self.residual = residual | |
| self.with_cp = with_cp | |
| def forward(self, *input_x: Tensor) -> Tensor: | |
| """Forward.""" | |
| def _inner_forward(*input_x: Tensor) -> Tensor: | |
| feats = self.conv(torch.cat(input_x, 1)) | |
| feats = self.bn(feats) | |
| if self.residual: | |
| feats += input_x[0] | |
| return feats | |
| if self.with_cp and input_x[0].requires_grad: | |
| feats = checkpoint(_inner_forward, *input_x, use_reentrant=True) | |
| else: | |
| feats = _inner_forward(*input_x) | |
| feats = self.relu(feats) | |
| return feats | |
| class Tree(nn.Module): | |
| """Tree.""" | |
| def __init__( # pylint: disable=too-many-arguments | |
| self, | |
| levels: int, | |
| block: str, | |
| in_channels: int, | |
| out_channels: int, | |
| stride: int = 1, | |
| level_root: bool = False, | |
| root_dim: int = 0, | |
| root_kernel_size: int = 1, | |
| dilation: int = 1, | |
| root_residual: bool = False, | |
| with_cp: bool = False, | |
| ) -> None: | |
| """Creates an instance of the class.""" | |
| super().__init__() | |
| if block == "BasicBlock": | |
| block_c = BasicBlock | |
| elif block == "Bottleneck": | |
| block_c = Bottleneck # type: ignore | |
| elif block == "BottleneckX": | |
| block_c = BottleneckX # type: ignore | |
| else: | |
| raise ValueError(f"Block={block} not yet supported in DLA!") | |
| if root_dim == 0: | |
| root_dim = 2 * out_channels | |
| if level_root: | |
| root_dim += in_channels | |
| if levels == 1: | |
| self.tree1: Tree | BasicBlock = block_c( | |
| in_channels, | |
| out_channels, | |
| stride, | |
| dilation=dilation, | |
| with_cp=with_cp, | |
| ) | |
| self.tree2: Tree | BasicBlock = block_c( | |
| out_channels, | |
| out_channels, | |
| 1, | |
| dilation=dilation, | |
| with_cp=with_cp, | |
| ) | |
| self.root = Root( | |
| root_dim, | |
| out_channels, | |
| root_kernel_size, | |
| root_residual, | |
| with_cp=with_cp, | |
| ) | |
| else: | |
| self.tree1 = Tree( | |
| levels - 1, | |
| block, | |
| in_channels, | |
| out_channels, | |
| stride, | |
| root_dim=0, | |
| root_kernel_size=root_kernel_size, | |
| dilation=dilation, | |
| root_residual=root_residual, | |
| with_cp=with_cp, | |
| ) | |
| self.tree2 = Tree( | |
| levels - 1, | |
| block, | |
| out_channels, | |
| out_channels, | |
| root_dim=root_dim + out_channels, | |
| root_kernel_size=root_kernel_size, | |
| dilation=dilation, | |
| root_residual=root_residual, | |
| with_cp=with_cp, | |
| ) | |
| self.level_root = level_root | |
| self.root_dim = root_dim | |
| self.downsample = None | |
| self.project = None | |
| self.levels = levels | |
| if stride > 1: | |
| self.downsample = nn.MaxPool2d(stride, stride=stride) | |
| if in_channels != out_channels and levels == 1: | |
| # NOTE the official impl/weights have project layers in levels > 1 | |
| # case that are never used, hence 'levels == 1' is added but | |
| # pretrained models will need strict=False while loading. | |
| self.project = nn.Sequential( | |
| nn.Conv2d( | |
| in_channels, | |
| out_channels, | |
| kernel_size=1, | |
| stride=1, | |
| bias=False, | |
| ), | |
| nn.BatchNorm2d(out_channels), | |
| ) | |
| def forward( | |
| self, | |
| input_x: Tensor, | |
| residual: None | Tensor = None, | |
| children: None | list[Tensor] = None, | |
| ) -> Tensor: | |
| """Forward.""" | |
| children = [] if children is None else children | |
| bottom = self.downsample(input_x) if self.downsample else input_x | |
| residual = self.project(bottom) if self.project else bottom | |
| if self.level_root: | |
| children.append(bottom) | |
| input_x1 = self.tree1(input_x, residual) | |
| if self.levels == 1: | |
| input_x2 = self.tree2(input_x1) | |
| input_x = self.root(input_x2, input_x1, *children) | |
| else: | |
| children.append(input_x1) | |
| input_x = self.tree2(input_x1, children=children) | |
| return input_x | |
| class DLA(BaseModel): | |
| """DLA base model.""" | |
| def __init__( | |
| self, | |
| name: str, | |
| out_indices: Sequence[int] = (0, 1, 2, 3), | |
| with_cp: bool = False, | |
| pretrained: bool = False, | |
| weights: None | str = None, | |
| ) -> None: | |
| """Creates an instance of the class.""" | |
| super().__init__() | |
| assert name in DLA_ARCH_SETTINGS, f"{name} is not supported!" | |
| levels, channels, residual_root, block = DLA_ARCH_SETTINGS[name] | |
| if name == "dla102x2": # pragma: no cover | |
| BottleneckX.cardinality = 64 | |
| self.base_layer = nn.Sequential( | |
| nn.Conv2d( | |
| 3, channels[0], kernel_size=7, stride=1, padding=3, bias=False | |
| ), | |
| nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM), | |
| nn.ReLU(inplace=True), | |
| ) | |
| self.level0 = self._make_conv_level( | |
| channels[0], channels[0], levels[0] | |
| ) | |
| self.level1 = self._make_conv_level( | |
| channels[0], channels[1], levels[1], stride=2 | |
| ) | |
| self.level2 = Tree( | |
| levels[2], | |
| block, | |
| channels[1], | |
| channels[2], | |
| 2, | |
| level_root=False, | |
| root_residual=residual_root, | |
| with_cp=with_cp, | |
| ) | |
| self.level3 = Tree( | |
| levels[3], | |
| block, | |
| channels[2], | |
| channels[3], | |
| 2, | |
| level_root=True, | |
| root_residual=residual_root, | |
| with_cp=with_cp, | |
| ) | |
| self.level4 = Tree( | |
| levels[4], | |
| block, | |
| channels[3], | |
| channels[4], | |
| 2, | |
| level_root=True, | |
| root_residual=residual_root, | |
| with_cp=with_cp, | |
| ) | |
| self.level5 = Tree( | |
| levels[5], | |
| block, | |
| channels[4], | |
| channels[5], | |
| 2, | |
| level_root=True, | |
| root_residual=residual_root, | |
| with_cp=with_cp, | |
| ) | |
| self.out_indices = out_indices | |
| self._out_channels = [channels[i + 2] for i in out_indices] | |
| if pretrained: | |
| if weights is None: # pragma: no cover | |
| weights = f"{DLA_MODEL_PREFIX}/{DLA_MODEL_MAPPING[name]}" | |
| load_model_checkpoint(self, weights) | |
| else: | |
| self._init_weights() | |
| def _init_weights(self) -> None: | |
| """Initialize module weights.""" | |
| for m in self.modules(): | |
| if isinstance(m, nn.Conv2d): | |
| n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | |
| m.weight.data.normal_(0, math.sqrt(2.0 / n)) | |
| elif isinstance(m, nn.BatchNorm2d): | |
| m.weight.data.fill_(1) | |
| m.bias.data.zero_() | |
| def _make_conv_level( | |
| inplanes: int, | |
| planes: int, | |
| convs: int, | |
| stride: int = 1, | |
| dilation: int = 1, | |
| ) -> nn.Sequential: | |
| """Build convolutional level.""" | |
| modules = [] | |
| for i in range(convs): | |
| modules.extend( | |
| [ | |
| nn.Conv2d( | |
| inplanes, | |
| planes, | |
| kernel_size=3, | |
| stride=stride if i == 0 else 1, | |
| padding=dilation, | |
| bias=False, | |
| dilation=dilation, | |
| ), | |
| nn.BatchNorm2d(planes, momentum=BN_MOMENTUM), | |
| nn.ReLU(inplace=True), | |
| ] | |
| ) | |
| inplanes = planes | |
| return nn.Sequential(*modules) | |
| def forward(self, images: Tensor) -> list[Tensor]: | |
| """DLA forward. | |
| Args: | |
| images (Tensor[N, C, H, W]): Image input to process. Expected to | |
| type float32 with values ranging 0..255. | |
| Returns: | |
| fp (list[Tensor]): The output feature pyramid. The list index | |
| represents the level, which has a downsampling raio of 2^index. | |
| """ | |
| input_x = self.base_layer(images) | |
| outs = [images, images] | |
| for i in range(6): | |
| input_x = getattr(self, f"level{i}")(input_x) | |
| if i - 2 in self.out_indices: | |
| outs.append(input_x) | |
| return outs | |
| def out_channels(self) -> list[int]: | |
| """Get the numbers of channels for each level of feature pyramid. | |
| Returns: | |
| list[int]: number of channels | |
| """ | |
| return [3, 3] + self._out_channels | |