# Copyright (c) OpenMMLab. All rights reserved. from mmcv.cnn import build_conv_layer, build_norm_layer from ..builder import BACKBONES from .resnet import Bottleneck as _Bottleneck from .resnet import ResLayer, ResNet class Bottleneck(_Bottleneck): """Bottleneck block for ResNeXt. Args: in_channels (int): Input channels of this block. out_channels (int): Output channels of this block. groups (int): Groups of conv2. width_per_group (int): Width per group of conv2. 64x4d indicates ``groups=64, width_per_group=4`` and 32x8d indicates ``groups=32, width_per_group=8``. stride (int): stride of the block. Default: 1 dilation (int): dilation of convolution. Default: 1 downsample (nn.Module): downsample operation on identity branch. Default: None style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two layer is the 3x3 conv layer, otherwise the stride-two layer is the first 1x1 conv layer. conv_cfg (dict): dictionary to construct and config conv layer. Default: None norm_cfg (dict): dictionary to construct and config norm layer. Default: dict(type='BN') with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. """ def __init__(self, in_channels, out_channels, base_channels=64, groups=32, width_per_group=4, **kwargs): super().__init__(in_channels, out_channels, **kwargs) self.groups = groups self.width_per_group = width_per_group # For ResNet bottleneck, middle channels are determined by expansion # and out_channels, but for ResNeXt bottleneck, it is determined by # groups and width_per_group and the stage it is located in. if groups != 1: assert self.mid_channels % base_channels == 0 self.mid_channels = ( groups * width_per_group * self.mid_channels // base_channels) self.norm1_name, norm1 = build_norm_layer( self.norm_cfg, self.mid_channels, postfix=1) self.norm2_name, norm2 = build_norm_layer( self.norm_cfg, self.mid_channels, postfix=2) self.norm3_name, norm3 = build_norm_layer( self.norm_cfg, self.out_channels, postfix=3) self.conv1 = build_conv_layer( self.conv_cfg, self.in_channels, self.mid_channels, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer( self.conv_cfg, self.mid_channels, self.mid_channels, kernel_size=3, stride=self.conv2_stride, padding=self.dilation, dilation=self.dilation, groups=groups, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer( self.conv_cfg, self.mid_channels, self.out_channels, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) @BACKBONES.register_module() class ResNeXt(ResNet): """ResNeXt backbone. Please refer to the `paper `__ for details. Args: depth (int): Network depth, from {50, 101, 152}. groups (int): Groups of conv2 in Bottleneck. Default: 32. width_per_group (int): Width per group of conv2 in Bottleneck. Default: 4. in_channels (int): Number of input image channels. Default: 3. stem_channels (int): Output channels of the stem layer. Default: 64. num_stages (int): Stages of the network. Default: 4. strides (Sequence[int]): Strides of the first block of each stage. Default: ``(1, 2, 2, 2)``. dilations (Sequence[int]): Dilation of each stage. Default: ``(1, 1, 1, 1)``. out_indices (Sequence[int]): Output from which stages. If only one stage is specified, a single tensor (feature map) is returned, otherwise multiple stages are specified, a tuple of tensors will be returned. Default: ``(3, )``. style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two layer is the 3x3 conv layer, otherwise the stride-two layer is the first 1x1 conv layer. deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv. Default: False. avg_down (bool): Use AvgPool instead of stride conv when downsampling in the bottleneck. Default: False. frozen_stages (int): Stages to be frozen (stop grad and set eval mode). -1 means not freezing any parameters. Default: -1. conv_cfg (dict | None): The config dict for conv layers. Default: None. norm_cfg (dict): The config dict for norm layers. norm_eval (bool): Whether to set norm layers to eval mode, namely, freeze running stats (mean and var). Note: Effect on Batch Norm and its variants only. Default: False. with_cp (bool): Use checkpoint or not. Using checkpoint will save some memory while slowing down the training speed. Default: False. zero_init_residual (bool): Whether to use zero init for last norm layer in resblocks to let them behave as identity. Default: True. Example: >>> from mmpose.models import ResNeXt >>> import torch >>> self = ResNeXt(depth=50, out_indices=(0, 1, 2, 3)) >>> self.eval() >>> inputs = torch.rand(1, 3, 32, 32) >>> level_outputs = self.forward(inputs) >>> for level_out in level_outputs: ... print(tuple(level_out.shape)) (1, 256, 8, 8) (1, 512, 4, 4) (1, 1024, 2, 2) (1, 2048, 1, 1) """ arch_settings = { 50: (Bottleneck, (3, 4, 6, 3)), 101: (Bottleneck, (3, 4, 23, 3)), 152: (Bottleneck, (3, 8, 36, 3)) } def __init__(self, depth, groups=32, width_per_group=4, **kwargs): self.groups = groups self.width_per_group = width_per_group super().__init__(depth, **kwargs) def make_res_layer(self, **kwargs): return ResLayer( groups=self.groups, width_per_group=self.width_per_group, base_channels=self.base_channels, **kwargs)