# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import math import paddle import paddle.nn as nn from paddle.nn import (Conv2D, BatchNorm2D, Linear, Dropout, MaxPool2D, AvgPool2D) from paddle import ParamAttr import paddle.nn.functional as F from ..registry import BACKBONES from ..weight_init import weight_init_ from ...utils import load_ckpt class ConvBNLayer(nn.Layer): """Conv2D and BatchNorm2D layer. Args: in_channels (int): Number of channels for the input. out_channels (int): Number of channels for the output. kernel_size (int): Kernel size. stride (int): Stride in the Conv2D layer. Default: 1. groups (int): Groups in the Conv2D, Default: 1. act (str): Indicate activation after BatchNorm2D layer. name (str): the name of an instance of ConvBNLayer. Note: weight and bias initialization include initialize values and name the restored parameters, values initialization are explicit declared in the ```init_weights``` method. """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, groups=1, act=None, name=None): super(ConvBNLayer, self).__init__() self._conv = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=(kernel_size - 1) // 2, groups=groups, weight_attr=ParamAttr(name=name + "_weights"), bias_attr=False) if name == "conv1": bn_name = "bn_" + name else: bn_name = "bn" + name[3:] self._act = act self._batch_norm = BatchNorm2D(out_channels, weight_attr=ParamAttr(name=bn_name + "_scale"), bias_attr=ParamAttr(bn_name + "_offset")) def forward(self, inputs): y = self._conv(inputs) y = self._batch_norm(y) if self._act: y = getattr(paddle.nn.functional, self._act)(y) return y class BottleneckBlock(nn.Layer): def __init__(self, in_channels, out_channels, stride, shortcut=True, name=None): super(BottleneckBlock, self).__init__() self.conv0 = ConvBNLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, act="relu", name=name + "_branch2a") self.conv1 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=stride, act="relu", name=name + "_branch2b") self.conv2 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None, name=name + "_branch2c") if not shortcut: self.short = ConvBNLayer(in_channels=in_channels, out_channels=out_channels * 4, kernel_size=1, stride=stride, name=name + "_branch1") self.shortcut = shortcut def forward(self, inputs): y = self.conv0(inputs) conv1 = self.conv1(y) conv2 = self.conv2(conv1) if self.shortcut: short = inputs else: short = self.short(inputs) y = paddle.add(x=short, y=conv2) return F.relu(y) class BasicBlock(nn.Layer): def __init__(self, in_channels, out_channels, stride, shortcut=True, name=None): super(BasicBlock, self).__init__() self.stride = stride self.conv0 = ConvBNLayer(in_channels=in_channels, out_channels=out_channels, filter_size=3, stride=stride, act="relu", name=name + "_branch2a") self.conv1 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels, filter_size=3, act=None, name=name + "_branch2b") if not shortcut: self.short = ConvBNLayer(in_channels=in_channels, out_channels=out_channels, filter_size=1, stride=stride, name=name + "_branch1") self.shortcut = shortcut def forward(self, inputs): y = self.conv0(inputs) conv1 = self.conv1(y) if self.shortcut: short = inputs else: short = self.short(inputs) y = paddle.add(short, conv1) y = F.relu(y) return y @BACKBONES.register() class ResNet(nn.Layer): """ResNet backbone. Args: depth (int): Depth of resnet model. pretrained (str): pretrained model. Default: None. """ def __init__(self, depth, pretrained=None): super(ResNet, self).__init__() self.pretrained = pretrained self.layers = depth supported_layers = [18, 34, 50, 101, 152] assert self.layers in supported_layers, \ "supported layers are {} but input layer is {}".format( supported_layers, self.layers) if self.layers == 18: depth = [2, 2, 2, 2] elif self.layers == 34 or self.layers == 50: depth = [3, 4, 6, 3] elif self.layers == 101: depth = [3, 4, 23, 3] elif self.layers == 152: depth = [3, 8, 36, 3] in_channels = [64, 256, 512, 1024] out_channels = [64, 128, 256, 512] self.conv = ConvBNLayer(in_channels=3, out_channels=64, kernel_size=7, stride=2, act="relu", name="conv1") self.pool2D_max = MaxPool2D(kernel_size=3, stride=2, padding=1) self.block_list = [] if self.layers >= 50: for block in range(len(depth)): shortcut = False for i in range(depth[block]): if self.layers in [101, 152] and block == 2: if i == 0: conv_name = "res" + str(block + 2) + "a" else: conv_name = "res" + str(block + 2) + "b" + str(i) else: conv_name = "res" + str(block + 2) + chr(97 + i) bottleneck_block = self.add_sublayer( conv_name, BottleneckBlock( # NOTE: Be careful! Here is different from TSM model. in_channels=in_channels[block] if i == 0 else out_channels[block] * 4, out_channels=out_channels[block], stride=2 if i == 0 and block != 0 else 1, shortcut=shortcut, name=conv_name)) self.block_list.append(bottleneck_block) shortcut = True else: for block in range(len(depth)): shortcut = False for i in range(depth[block]): conv_name = "res" + str(block + 2) + chr(97 + i) basic_block = self.add_sublayer( conv_name, BasicBlock(in_channels=in_channels[block] if i == 0 else out_channels[block], out_channels=out_channels[block], stride=2 if i == 0 and block != 0 else 1, shortcut=shortcut, name=conv_name)) self.block_list.append(basic_block) shortcut = True def init_weights(self): """Initiate the parameters. Note: 1. when indicate pretrained loading path, will load it to initiate backbone. 2. when not indicating pretrained loading path, will follow specific initialization initiate backbone. Always, Conv2D layer will be initiated by KaimingNormal function, and BatchNorm2d will be initiated by Constant function. Please refer to https://www.paddlepaddle.org.cn/documentation/docs/en/develop/api/paddle/nn/initializer/kaiming/KaimingNormal_en.html """ #XXX: check bias!!! check pretrained!!! if isinstance(self.pretrained, str) and self.pretrained.strip() != "": load_ckpt(self, self.pretrained) elif self.pretrained is None or self.pretrained.strip() == "": for layer in self.sublayers(): if isinstance(layer, nn.Conv2D): #XXX: no bias weight_init_(layer, 'KaimingNormal') elif isinstance(layer, nn.BatchNorm2D): weight_init_(layer, 'Constant', value=1) def forward(self, inputs): """Define how the backbone is going to run. """ #NOTE: Already merge axis 0(batches) and axis 1(channels) before extracting feature phase, # please refer to paddlevideo/modeling/framework/recognizers/recognizer2d.py#L27 #y = paddle.reshape( # inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]]) y = self.conv(inputs) y = self.pool2D_max(y) for block in self.block_list: y = block(y) return y