You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

268 lines
9.3 KiB
Python

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import os.path as osp
import sys
import paddle
from paddle.jit import to_static
from paddle.static import InputSpec
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
from paddlevideo.modeling.builder import build_model
from paddlevideo.utils import get_config
def parse_args():
parser = argparse.ArgumentParser("PaddleVideo export model script")
parser.add_argument('-c',
'--config',
type=str,
default='configs/example.yaml',
help='config file path')
parser.add_argument('--override',
action='append',
default=[],
help='config options to be overridden')
parser.add_argument("-p",
"--pretrained_params",
default='./best.pdparams',
type=str,
help='params path')
parser.add_argument("-o",
"--output_path",
type=str,
default="./inference",
help='output path')
parser.add_argument('--save_name',
type=str,
default=None,
help='specify the exported inference \
files(pdiparams and pdmodel) name,\
only used in TIPC')
return parser.parse_args()
def trim_config(cfg):
"""
Reuse the trainging config will bring useless attributes, such as: backbone.pretrained model.
and some build phase attributes should be overrided, such as: backbone.num_seg.
Trim it here.
"""
model_name = cfg.model_name
if cfg.MODEL.get('backbone') and cfg.MODEL.backbone.get('pretrained'):
cfg.MODEL.backbone.pretrained = "" # not ued when inference
# for distillation
if cfg.MODEL.get('models'):
if cfg.MODEL.models[0]['Teacher']['backbone'].get('pretrained'):
cfg.MODEL.models[0]['Teacher']['backbone']['pretrained'] = ""
if cfg.MODEL.models[1]['Student']['backbone'].get('pretrained'):
cfg.MODEL.models[1]['Student']['backbone']['pretrained'] = ""
return cfg, model_name
def get_input_spec(cfg, model_name):
if model_name in ['ppTSM', 'TSM', 'MoViNet', 'ppTSMv2']:
input_spec = [[
InputSpec(
shape=[None, cfg.num_seg, 3, cfg.target_size, cfg.target_size],
dtype='float32'),
]]
elif model_name in ['TokenShiftVisionTransformer']:
input_spec = [[
InputSpec(shape=[
None, 3, cfg.num_seg * 3, cfg.target_size, cfg.target_size
],
dtype='float32'),
]]
elif model_name in ['TSN', 'ppTSN']:
input_spec = [[
InputSpec(shape=[
None, cfg.num_seg * 10, 3, cfg.target_size, cfg.target_size
],
dtype='float32'),
]]
elif model_name in ['BMN']:
input_spec = [[
InputSpec(shape=[None, cfg.feat_dim, cfg.tscale],
dtype='float32',
name='feat_input'),
]]
elif model_name in ['TimeSformer', 'ppTimeSformer']:
input_spec = [[
InputSpec(shape=[
None, 3, cfg.num_seg * 3, cfg.target_size, cfg.target_size
],
dtype='float32'),
]]
elif model_name in ['VideoSwin']:
input_spec = [[
InputSpec(shape=[
None, 3, cfg.num_seg * cfg.seg_len * 1, cfg.target_size,
cfg.target_size
],
dtype='float32'),
]]
elif model_name in ['VideoSwin_TableTennis']:
input_spec = [[
InputSpec(shape=[
None, 3, cfg.num_seg * cfg.seg_len * 3, cfg.target_size,
cfg.target_size
],
dtype='float32'),
]]
elif model_name in ['AttentionLSTM']:
input_spec = [[
InputSpec(shape=[None, cfg.embedding_size, cfg.feature_dims[0]],
dtype='float32'), # for rgb_data
InputSpec(shape=[
None,
], dtype='int64'), # for rgb_len
InputSpec(shape=[None, cfg.embedding_size, cfg.feature_dims[0]],
dtype='float32'), # for rgb_mask
InputSpec(shape=[None, cfg.embedding_size, cfg.feature_dims[1]],
dtype='float32'), # for audio_data
InputSpec(shape=[
None,
], dtype='int64'), # for audio_len
InputSpec(shape=[None, cfg.embedding_size, cfg.feature_dims[1]],
dtype='float32'), # for audio_mask
]]
elif model_name in ['SlowFast']:
input_spec = [[
InputSpec(shape=[
None, 3, cfg.num_frames // cfg.alpha, cfg.target_size,
cfg.target_size
],
dtype='float32',
name='slow_input'),
InputSpec(shape=[
None, 3, cfg.num_frames, cfg.target_size, cfg.target_size
],
dtype='float32',
name='fast_input'),
]]
elif model_name in ['STGCN', 'AGCN', 'CTRGCN']:
input_spec = [[
InputSpec(shape=[
None, cfg.num_channels, cfg.window_size, cfg.vertex_nums,
cfg.person_nums
],
dtype='float32'),
]]
# 由于在模型运行过程中涉及到第一维乘human个数(N*M), 所以这里用1作为shape
elif model_name in ['AGCN2s']:
input_spec = [[
InputSpec(shape=[
1, cfg.num_channels, cfg.window_size, cfg.vertex_nums,
cfg.person_nums
],
dtype='float32'),
]]
elif model_name in ['TransNetV2']:
input_spec = [[
InputSpec(shape=[
None,
cfg.num_frames,
cfg.height,
cfg.width,
cfg.num_channels,
],
dtype='float32'),
]]
elif model_name in ['MSTCN', 'ASRF']:
input_spec = [[
InputSpec(shape=[None, cfg.num_channels, None], dtype='float32'),
]]
elif model_name in ['ADDS']:
input_spec = [[
InputSpec(shape=[None, cfg.num_channels, cfg.height, cfg.width],
dtype='float32'),
]]
elif model_name in ['AVA_SlowFast_FastRcnn']:
input_spec = [[
InputSpec(shape=[
None, 3, cfg.num_frames // cfg.alpha, cfg.target_size,
cfg.target_size
],
dtype='float32',
name='slow_input'),
InputSpec(shape=[
None, 3, cfg.num_frames, cfg.target_size, cfg.target_size
],
dtype='float32',
name='fast_input'),
InputSpec(shape=[None, None, 4], dtype='float32', name='proposals'),
InputSpec(shape=[None, 2], dtype='float32', name='img_shape')
]]
elif model_name in ['PoseC3D']:
input_spec = [[
InputSpec(shape=[None, 1, 17, 48, 56, 56], dtype='float32'),
]]
elif model_name in ['YOWO']:
input_spec = [[
InputSpec(shape=[
1, 3, cfg.num_seg, cfg.target_size, cfg.target_size
],
dtype='float32'),
]]
return input_spec
def main():
args = parse_args()
cfg, model_name = trim_config(
get_config(args.config, overrides=args.override, show=False))
print(f"Building model({model_name})...")
model = build_model(cfg.MODEL)
assert osp.isfile(
args.pretrained_params
), f"pretrained params ({args.pretrained_params} is not a file path.)"
if not os.path.isdir(args.output_path):
os.makedirs(args.output_path)
print(f"Loading params from ({args.pretrained_params})...")
params = paddle.load(args.pretrained_params)
model.set_dict(params)
model.eval()
# for rep nets
for layer in model.sublayers():
if hasattr(layer, "rep") and not getattr(layer, "is_repped"):
layer.rep()
input_spec = get_input_spec(cfg.INFERENCE, model_name)
model = to_static(model, input_spec=input_spec)
paddle.jit.save(
model,
osp.join(args.output_path,
model_name if args.save_name is None else args.save_name))
print(
f"model ({model_name}) has been already saved in ({args.output_path}).")
if __name__ == "__main__":
main()