You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
284 lines
11 KiB
Python
284 lines
11 KiB
Python
2 years ago
|
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
import numpy as np
|
||
|
import math
|
||
|
|
||
|
import paddle
|
||
|
import paddle.nn as nn
|
||
|
from paddle.nn import (Conv2D, BatchNorm2D, Linear, Dropout, MaxPool2D,
|
||
|
AvgPool2D)
|
||
|
from paddle import ParamAttr
|
||
|
import paddle.nn.functional as F
|
||
|
|
||
|
from ..registry import BACKBONES
|
||
|
from ..weight_init import weight_init_
|
||
|
from ...utils import load_ckpt
|
||
|
|
||
|
|
||
|
class ConvBNLayer(nn.Layer):
|
||
|
"""Conv2D and BatchNorm2D layer.
|
||
|
|
||
|
Args:
|
||
|
in_channels (int): Number of channels for the input.
|
||
|
out_channels (int): Number of channels for the output.
|
||
|
kernel_size (int): Kernel size.
|
||
|
stride (int): Stride in the Conv2D layer. Default: 1.
|
||
|
groups (int): Groups in the Conv2D, Default: 1.
|
||
|
act (str): Indicate activation after BatchNorm2D layer.
|
||
|
name (str): the name of an instance of ConvBNLayer.
|
||
|
|
||
|
Note: weight and bias initialization include initialize values and name the restored parameters, values initialization are explicit declared in the ```init_weights``` method.
|
||
|
|
||
|
"""
|
||
|
def __init__(self,
|
||
|
in_channels,
|
||
|
out_channels,
|
||
|
kernel_size,
|
||
|
stride=1,
|
||
|
groups=1,
|
||
|
act=None,
|
||
|
name=None):
|
||
|
super(ConvBNLayer, self).__init__()
|
||
|
self._conv = Conv2D(in_channels=in_channels,
|
||
|
out_channels=out_channels,
|
||
|
kernel_size=kernel_size,
|
||
|
stride=stride,
|
||
|
padding=(kernel_size - 1) // 2,
|
||
|
groups=groups,
|
||
|
weight_attr=ParamAttr(name=name + "_weights"),
|
||
|
bias_attr=False)
|
||
|
if name == "conv1":
|
||
|
bn_name = "bn_" + name
|
||
|
else:
|
||
|
bn_name = "bn" + name[3:]
|
||
|
|
||
|
self._act = act
|
||
|
|
||
|
self._batch_norm = BatchNorm2D(out_channels,
|
||
|
weight_attr=ParamAttr(name=bn_name +
|
||
|
"_scale"),
|
||
|
bias_attr=ParamAttr(bn_name + "_offset"))
|
||
|
|
||
|
def forward(self, inputs):
|
||
|
y = self._conv(inputs)
|
||
|
y = self._batch_norm(y)
|
||
|
if self._act:
|
||
|
y = getattr(paddle.nn.functional, self._act)(y)
|
||
|
return y
|
||
|
|
||
|
|
||
|
class BottleneckBlock(nn.Layer):
|
||
|
def __init__(self,
|
||
|
in_channels,
|
||
|
out_channels,
|
||
|
stride,
|
||
|
shortcut=True,
|
||
|
name=None):
|
||
|
super(BottleneckBlock, self).__init__()
|
||
|
self.conv0 = ConvBNLayer(in_channels=in_channels,
|
||
|
out_channels=out_channels,
|
||
|
kernel_size=1,
|
||
|
act="relu",
|
||
|
name=name + "_branch2a")
|
||
|
self.conv1 = ConvBNLayer(in_channels=out_channels,
|
||
|
out_channels=out_channels,
|
||
|
kernel_size=3,
|
||
|
stride=stride,
|
||
|
act="relu",
|
||
|
name=name + "_branch2b")
|
||
|
|
||
|
self.conv2 = ConvBNLayer(in_channels=out_channels,
|
||
|
out_channels=out_channels * 4,
|
||
|
kernel_size=1,
|
||
|
act=None,
|
||
|
name=name + "_branch2c")
|
||
|
|
||
|
if not shortcut:
|
||
|
self.short = ConvBNLayer(in_channels=in_channels,
|
||
|
out_channels=out_channels * 4,
|
||
|
kernel_size=1,
|
||
|
stride=stride,
|
||
|
name=name + "_branch1")
|
||
|
|
||
|
self.shortcut = shortcut
|
||
|
|
||
|
def forward(self, inputs):
|
||
|
y = self.conv0(inputs)
|
||
|
conv1 = self.conv1(y)
|
||
|
conv2 = self.conv2(conv1)
|
||
|
if self.shortcut:
|
||
|
short = inputs
|
||
|
else:
|
||
|
short = self.short(inputs)
|
||
|
y = paddle.add(x=short, y=conv2)
|
||
|
return F.relu(y)
|
||
|
|
||
|
|
||
|
class BasicBlock(nn.Layer):
|
||
|
def __init__(self,
|
||
|
in_channels,
|
||
|
out_channels,
|
||
|
stride,
|
||
|
shortcut=True,
|
||
|
name=None):
|
||
|
super(BasicBlock, self).__init__()
|
||
|
self.stride = stride
|
||
|
self.conv0 = ConvBNLayer(in_channels=in_channels,
|
||
|
out_channels=out_channels,
|
||
|
filter_size=3,
|
||
|
stride=stride,
|
||
|
act="relu",
|
||
|
name=name + "_branch2a")
|
||
|
self.conv1 = ConvBNLayer(in_channels=out_channels,
|
||
|
out_channels=out_channels,
|
||
|
filter_size=3,
|
||
|
act=None,
|
||
|
name=name + "_branch2b")
|
||
|
|
||
|
if not shortcut:
|
||
|
self.short = ConvBNLayer(in_channels=in_channels,
|
||
|
out_channels=out_channels,
|
||
|
filter_size=1,
|
||
|
stride=stride,
|
||
|
name=name + "_branch1")
|
||
|
|
||
|
self.shortcut = shortcut
|
||
|
|
||
|
def forward(self, inputs):
|
||
|
y = self.conv0(inputs)
|
||
|
conv1 = self.conv1(y)
|
||
|
|
||
|
if self.shortcut:
|
||
|
short = inputs
|
||
|
else:
|
||
|
short = self.short(inputs)
|
||
|
y = paddle.add(short, conv1)
|
||
|
y = F.relu(y)
|
||
|
return y
|
||
|
|
||
|
|
||
|
@BACKBONES.register()
|
||
|
class ResNet(nn.Layer):
|
||
|
"""ResNet backbone.
|
||
|
|
||
|
Args:
|
||
|
depth (int): Depth of resnet model.
|
||
|
pretrained (str): pretrained model. Default: None.
|
||
|
"""
|
||
|
def __init__(self, depth, pretrained=None):
|
||
|
super(ResNet, self).__init__()
|
||
|
self.pretrained = pretrained
|
||
|
self.layers = depth
|
||
|
|
||
|
supported_layers = [18, 34, 50, 101, 152]
|
||
|
assert self.layers in supported_layers, \
|
||
|
"supported layers are {} but input layer is {}".format(
|
||
|
supported_layers, self.layers)
|
||
|
|
||
|
if self.layers == 18:
|
||
|
depth = [2, 2, 2, 2]
|
||
|
elif self.layers == 34 or self.layers == 50:
|
||
|
depth = [3, 4, 6, 3]
|
||
|
elif self.layers == 101:
|
||
|
depth = [3, 4, 23, 3]
|
||
|
elif self.layers == 152:
|
||
|
depth = [3, 8, 36, 3]
|
||
|
|
||
|
in_channels = [64, 256, 512, 1024]
|
||
|
out_channels = [64, 128, 256, 512]
|
||
|
|
||
|
self.conv = ConvBNLayer(in_channels=3,
|
||
|
out_channels=64,
|
||
|
kernel_size=7,
|
||
|
stride=2,
|
||
|
act="relu",
|
||
|
name="conv1")
|
||
|
self.pool2D_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
|
||
|
|
||
|
self.block_list = []
|
||
|
if self.layers >= 50:
|
||
|
for block in range(len(depth)):
|
||
|
shortcut = False
|
||
|
for i in range(depth[block]):
|
||
|
if self.layers in [101, 152] and block == 2:
|
||
|
if i == 0:
|
||
|
conv_name = "res" + str(block + 2) + "a"
|
||
|
else:
|
||
|
conv_name = "res" + str(block + 2) + "b" + str(i)
|
||
|
else:
|
||
|
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||
|
bottleneck_block = self.add_sublayer(
|
||
|
conv_name,
|
||
|
BottleneckBlock(
|
||
|
# NOTE: Be careful! Here is different from TSM model.
|
||
|
in_channels=in_channels[block]
|
||
|
if i == 0 else out_channels[block] * 4,
|
||
|
out_channels=out_channels[block],
|
||
|
stride=2 if i == 0 and block != 0 else 1,
|
||
|
shortcut=shortcut,
|
||
|
name=conv_name))
|
||
|
|
||
|
self.block_list.append(bottleneck_block)
|
||
|
shortcut = True
|
||
|
else:
|
||
|
for block in range(len(depth)):
|
||
|
shortcut = False
|
||
|
for i in range(depth[block]):
|
||
|
conv_name = "res" + str(block + 2) + chr(97 + i)
|
||
|
basic_block = self.add_sublayer(
|
||
|
conv_name,
|
||
|
BasicBlock(in_channels=in_channels[block]
|
||
|
if i == 0 else out_channels[block],
|
||
|
out_channels=out_channels[block],
|
||
|
stride=2 if i == 0 and block != 0 else 1,
|
||
|
shortcut=shortcut,
|
||
|
name=conv_name))
|
||
|
self.block_list.append(basic_block)
|
||
|
shortcut = True
|
||
|
|
||
|
def init_weights(self):
|
||
|
"""Initiate the parameters.
|
||
|
Note:
|
||
|
1. when indicate pretrained loading path, will load it to initiate backbone.
|
||
|
2. when not indicating pretrained loading path, will follow specific initialization initiate backbone. Always, Conv2D layer will be initiated by KaimingNormal function, and BatchNorm2d will be initiated by Constant function.
|
||
|
Please refer to https://www.paddlepaddle.org.cn/documentation/docs/en/develop/api/paddle/nn/initializer/kaiming/KaimingNormal_en.html
|
||
|
"""
|
||
|
#XXX: check bias!!! check pretrained!!!
|
||
|
|
||
|
if isinstance(self.pretrained, str) and self.pretrained.strip() != "":
|
||
|
load_ckpt(self, self.pretrained)
|
||
|
elif self.pretrained is None or self.pretrained.strip() == "":
|
||
|
for layer in self.sublayers():
|
||
|
if isinstance(layer, nn.Conv2D):
|
||
|
#XXX: no bias
|
||
|
weight_init_(layer, 'KaimingNormal')
|
||
|
elif isinstance(layer, nn.BatchNorm2D):
|
||
|
weight_init_(layer, 'Constant', value=1)
|
||
|
|
||
|
def forward(self, inputs):
|
||
|
"""Define how the backbone is going to run.
|
||
|
|
||
|
"""
|
||
|
#NOTE: Already merge axis 0(batches) and axis 1(channels) before extracting feature phase,
|
||
|
# please refer to paddlevideo/modeling/framework/recognizers/recognizer2d.py#L27
|
||
|
#y = paddle.reshape(
|
||
|
# inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]])
|
||
|
|
||
|
y = self.conv(inputs)
|
||
|
y = self.pool2D_max(y)
|
||
|
for block in self.block_list:
|
||
|
y = block(y)
|
||
|
return y
|