You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
158 lines
5.3 KiB
Python
158 lines
5.3 KiB
Python
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import math
|
|
import paddle
|
|
import paddle.nn.initializer as init
|
|
import numpy as np
|
|
from scipy import special
|
|
|
|
|
|
def weight_init_(layer,
|
|
func,
|
|
weight_name=None,
|
|
bias_name=None,
|
|
bias_value=0.0,
|
|
**kwargs):
|
|
"""
|
|
In-place params init function.
|
|
Usage:
|
|
.. code-block:: python
|
|
|
|
import paddle
|
|
import numpy as np
|
|
|
|
data = np.ones([3, 4], dtype='float32')
|
|
linear = paddle.nn.Linear(4, 4)
|
|
input = paddle.to_tensor(data)
|
|
print(linear.weight)
|
|
linear(input)
|
|
|
|
weight_init_(linear, 'Normal', 'fc_w0', 'fc_b0', std=0.01, mean=0.1)
|
|
print(linear.weight)
|
|
"""
|
|
|
|
if hasattr(layer, 'weight') and layer.weight is not None:
|
|
getattr(init, func)(**kwargs)(layer.weight)
|
|
if weight_name is not None:
|
|
# override weight name
|
|
layer.weight.name = weight_name
|
|
|
|
if hasattr(layer, 'bias') and layer.bias is not None:
|
|
init.Constant(bias_value)(layer.bias)
|
|
if bias_name is not None:
|
|
# override bias name
|
|
layer.bias.name = bias_name
|
|
|
|
|
|
def _no_grad_trunc_normal_(tensor, mean, std, a, b):
|
|
def norm_cdf(x):
|
|
# Computes standard normal cumulative distribution function
|
|
return (1. + math.erf(x / math.sqrt(2.))) / 2.
|
|
|
|
if (mean < a - 2 * std) or (mean > b + 2 * std):
|
|
print("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
|
|
"The distribution of values may be incorrect.")
|
|
|
|
with paddle.no_grad():
|
|
# Values are generated by using a truncated uniform distribution and
|
|
# then using the inverse CDF for the normal distribution.
|
|
# Get upper and lower cdf values
|
|
l = norm_cdf((a - mean) / std)
|
|
u = norm_cdf((b - mean) / std)
|
|
|
|
# Uniformly fill tensor with values from [l, u], then translate to [2l-1, 2u-1].
|
|
tmp = np.random.uniform(2 * l - 1, 2 * u - 1,
|
|
size=list(tensor.shape)).astype(np.float32)
|
|
|
|
# Use inverse cdf transform for normal distribution to get truncated
|
|
# standard normal
|
|
tmp = special.erfinv(tmp)
|
|
|
|
# Transform to proper mean, std
|
|
tmp *= (std * math.sqrt(2.0))
|
|
tmp += mean
|
|
|
|
# Clamp to ensure it's in the proper range
|
|
tmp = np.clip(tmp, a, b)
|
|
tensor.set_value(paddle.to_tensor(tmp))
|
|
|
|
return tensor
|
|
|
|
|
|
def _calculate_fan_in_and_fan_out(tensor):
|
|
dimensions = tensor.dim()
|
|
if dimensions < 2:
|
|
raise ValueError(
|
|
"Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
|
|
)
|
|
|
|
num_input_fmaps = tensor.shape[1]
|
|
num_output_fmaps = tensor.shape[0]
|
|
receptive_field_size = 1
|
|
if tensor.dim() > 2:
|
|
receptive_field_size = tensor[0][0].numel()
|
|
fan_in = num_input_fmaps * receptive_field_size
|
|
fan_out = num_output_fmaps * receptive_field_size
|
|
|
|
return fan_in, fan_out
|
|
|
|
|
|
def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
|
|
return _no_grad_trunc_normal_(tensor, mean, std, a, b)
|
|
|
|
|
|
def kaiming_normal_(tensor, a=0., mode='fan_in', nonlinearity='leaky_relu'):
|
|
def _calculate_correct_fan(tensor, mode):
|
|
mode = mode.lower()
|
|
valid_modes = ['fan_in', 'fan_out']
|
|
if mode not in valid_modes:
|
|
raise ValueError(
|
|
"Mode {} not supported, please use one of {}".format(
|
|
mode, valid_modes))
|
|
|
|
fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
|
|
return fan_in if mode == 'fan_in' else fan_out
|
|
|
|
def calculate_gain(nonlinearity, param=None):
|
|
linear_fns = [
|
|
'linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d',
|
|
'conv_transpose2d', 'conv_transpose3d'
|
|
]
|
|
if nonlinearity in linear_fns or nonlinearity == 'sigmoid':
|
|
return 1
|
|
elif nonlinearity == 'tanh':
|
|
return 5.0 / 3
|
|
elif nonlinearity == 'relu':
|
|
return math.sqrt(2.0)
|
|
elif nonlinearity == 'leaky_relu':
|
|
if param is None:
|
|
negative_slope = 0.01
|
|
elif not isinstance(param, bool) and isinstance(
|
|
param, int) or isinstance(param, float):
|
|
negative_slope = param
|
|
else:
|
|
raise ValueError(
|
|
"negative_slope {} not a valid number".format(param))
|
|
return math.sqrt(2.0 / (1 + negative_slope**2))
|
|
else:
|
|
raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
|
|
|
|
fan = _calculate_correct_fan(tensor, mode)
|
|
gain = calculate_gain(nonlinearity, a)
|
|
std = gain / math.sqrt(fan)
|
|
with paddle.no_grad():
|
|
paddle.nn.initializer.Normal(0, std)(tensor)
|
|
return tensor
|