You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

331 lines
9.7 KiB
Python

import sys
import math
from paddle.optimizer.lr import LinearWarmup
from paddle.optimizer.lr import PiecewiseDecay
from paddle.optimizer.lr import CosineAnnealingDecay
from paddle.optimizer.lr import ExponentialDecay
import paddle
import paddle.regularizer as regularizer
from copy import deepcopy
class Cosine(CosineAnnealingDecay):
"""
Cosine learning rate decay
lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
Args:
lr(float): initial learning rate
step_each_epoch(int): steps each epoch
epochs(int): total training epochs
"""
def __init__(self, lr, step_each_epoch, epochs, **kwargs):
super(Cosine, self).__init__(
learning_rate=lr,
T_max=step_each_epoch * epochs,
)
self.update_specified = False
class Piecewise(PiecewiseDecay):
"""
Piecewise learning rate decay
Args:
lr(float): initial learning rate
step_each_epoch(int): steps each epoch
decay_epochs(list): piecewise decay epochs
gamma(float): decay factor
"""
def __init__(self, lr, step_each_epoch, decay_epochs, gamma=0.1, **kwargs):
boundaries = [step_each_epoch * e for e in decay_epochs]
lr_values = [lr * (gamma**i) for i in range(len(boundaries) + 1)]
super(Piecewise, self).__init__(boundaries=boundaries, values=lr_values)
self.update_specified = False
class CosineWarmup(LinearWarmup):
"""
Cosine learning rate decay with warmup
[0, warmup_epoch): linear warmup
[warmup_epoch, epochs): cosine decay
Args:
lr(float): initial learning rate
step_each_epoch(int): steps each epoch
epochs(int): total training epochs
warmup_epoch(int): epoch num of warmup
"""
def __init__(self, lr, step_each_epoch, epochs, warmup_epoch=5, **kwargs):
assert (
epochs > warmup_epoch
), "total epoch({}) should be larger than warmup_epoch({}) in CosineWarmup.".format(
epochs, warmup_epoch
)
warmup_step = warmup_epoch * step_each_epoch
start_lr = 0.0
end_lr = lr
lr_sch = Cosine(lr, step_each_epoch, epochs - warmup_epoch)
super(CosineWarmup, self).__init__(
learning_rate=lr_sch,
warmup_steps=warmup_step,
start_lr=start_lr,
end_lr=end_lr,
)
self.update_specified = False
class ExponentialWarmup(LinearWarmup):
"""
Exponential learning rate decay with warmup
[0, warmup_epoch): linear warmup
[warmup_epoch, epochs): Exponential decay
Args:
lr(float): initial learning rate
step_each_epoch(int): steps each epoch
decay_epochs(float): decay epochs
decay_rate(float): decay rate
warmup_epoch(int): epoch num of warmup
"""
def __init__(
self,
lr,
step_each_epoch,
decay_epochs=2.4,
decay_rate=0.97,
warmup_epoch=5,
**kwargs,
):
warmup_step = warmup_epoch * step_each_epoch
start_lr = 0.0
end_lr = lr
lr_sch = ExponentialDecay(lr, decay_rate)
super(ExponentialWarmup, self).__init__(
learning_rate=lr_sch,
warmup_steps=warmup_step,
start_lr=start_lr,
end_lr=end_lr,
)
# NOTE: hac method to update exponential lr scheduler
self.update_specified = True
self.update_start_step = warmup_step
self.update_step_interval = int(decay_epochs * step_each_epoch)
self.step_each_epoch = step_each_epoch
class LearningRateBuilder:
"""
Build learning rate variable
https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/layers_cn.html
Args:
function(str): class name of learning rate
params(dict): parameters used for init the class
"""
def __init__(
self, function="Linear", params={"lr": 0.1, "steps": 100, "end_lr": 0.0}
):
self.function = function
self.params = params
def __call__(self):
mod = sys.modules[__name__]
lr = getattr(mod, self.function)(**self.params)
return lr
class L1Decay(object):
"""
L1 Weight Decay Regularization, which encourages the weights to be sparse.
Args:
factor(float): regularization coeff. Default:0.0.
"""
def __init__(self, factor=0.0):
super(L1Decay, self).__init__()
self.factor = factor
def __call__(self):
reg = regularizer.L1Decay(self.factor)
return reg
class L2Decay(object):
"""
L2 Weight Decay Regularization, which encourages the weights to be sparse.
Args:
factor(float): regularization coeff. Default:0.0.
"""
def __init__(self, factor=0.0):
super(L2Decay, self).__init__()
self.factor = factor
def __call__(self):
reg = regularizer.L2Decay(self.factor)
return reg
class Momentum(object):
"""
Simple Momentum optimizer with velocity state.
Args:
learning_rate (float|Variable) - The learning rate used to update parameters.
Can be a float value or a Variable with one float value as data element.
momentum (float) - Momentum factor.
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
"""
def __init__(
self, learning_rate, momentum, parameter_list=None, regularization=None, **args
):
super(Momentum, self).__init__()
self.learning_rate = learning_rate
self.momentum = momentum
self.parameter_list = parameter_list
self.regularization = regularization
def __call__(self):
opt = paddle.optimizer.Momentum(
learning_rate=self.learning_rate,
momentum=self.momentum,
parameters=self.parameter_list,
weight_decay=self.regularization,
)
return opt
class RMSProp(object):
"""
Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
Args:
learning_rate (float|Variable) - The learning rate used to update parameters.
Can be a float value or a Variable with one float value as data element.
momentum (float) - Momentum factor.
rho (float) - rho value in equation.
epsilon (float) - avoid division by zero, default is 1e-6.
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
"""
def __init__(
self,
learning_rate,
momentum,
rho=0.95,
epsilon=1e-6,
parameter_list=None,
regularization=None,
**args,
):
super(RMSProp, self).__init__()
self.learning_rate = learning_rate
self.momentum = momentum
self.rho = rho
self.epsilon = epsilon
self.parameter_list = parameter_list
self.regularization = regularization
def __call__(self):
opt = paddle.optimizer.RMSProp(
learning_rate=self.learning_rate,
momentum=self.momentum,
rho=self.rho,
epsilon=self.epsilon,
parameters=self.parameter_list,
weight_decay=self.regularization,
)
return opt
class OptimizerBuilder(object):
"""
Build optimizer
Args:
function(str): optimizer name of learning rate
params(dict): parameters used for init the class
regularizer (dict): parameters used for create regularization
"""
def __init__(self, function="Momentum", params={"momentum": 0.9}, regularizer=None):
self.function = function
self.params = params
# create regularizer
if regularizer is not None:
mod = sys.modules[__name__]
reg_func = regularizer["function"] + "Decay"
del regularizer["function"]
reg = getattr(mod, reg_func)(**regularizer)()
self.params["regularization"] = reg
def __call__(self, learning_rate, parameter_list=None):
mod = sys.modules[__name__]
opt = getattr(mod, self.function)
return opt(
learning_rate=learning_rate, parameter_list=parameter_list, **self.params
)()
def create_optimizer(config, parameter_list=None):
"""
Create an optimizer using config, usually including
learning rate and regularization.
Args:
config(dict): such as
{
'LEARNING_RATE':
{'function': 'Cosine',
'params': {'lr': 0.1}
},
'OPTIMIZER':
{'function': 'Momentum',
'params':{'momentum': 0.9},
'regularizer':
{'function': 'L2', 'factor': 0.0001}
}
}
Returns:
an optimizer instance
"""
# create learning_rate instance
lr_config = config["LEARNING_RATE"]
lr_config["params"].update(
{
"epochs": config["epoch"],
"step_each_epoch": config["total_images"] // config["TRAIN"]["batch_size"],
}
)
lr = LearningRateBuilder(**lr_config)()
# create optimizer instance
opt_config = deepcopy(config["OPTIMIZER"])
opt = OptimizerBuilder(**opt_config)
return opt(lr, parameter_list), lr
def create_multi_optimizer(config, parameter_list=None):
""" """
# create learning_rate instance
lr_config = config["LEARNING_RATE"]
lr_config["params"].update(
{
"epochs": config["epoch"],
"step_each_epoch": config["total_images"] // config["TRAIN"]["batch_size"],
}
)
lr = LearningRateBuilder(**lr_config)()
# create optimizer instance
opt_config = deepcopy.copy(config["OPTIMIZER"])
opt = OptimizerBuilder(**opt_config)
return opt(lr, parameter_list), lr