|
|
|
|
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
|
import paddle
|
|
|
|
|
import os, sys
|
|
|
|
|
import copy as cp
|
|
|
|
|
import cv2
|
|
|
|
|
import math
|
|
|
|
|
try:
|
|
|
|
|
import ppdet
|
|
|
|
|
except ImportError as e:
|
|
|
|
|
print(
|
|
|
|
|
f"Warning! {e}, [paddledet] package and it's dependencies is required for AVA."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
__dir__ = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(__dir__, '../')))
|
|
|
|
|
|
|
|
|
|
from paddlevideo.modeling.builder import build_model
|
|
|
|
|
from paddlevideo.utils import get_config
|
|
|
|
|
from paddlevideo.loader.builder import build_dataloader, build_dataset, build_pipeline
|
|
|
|
|
from paddlevideo.metrics.ava_utils import read_labelmap
|
|
|
|
|
|
|
|
|
|
import time
|
|
|
|
|
from os import path as osp
|
|
|
|
|
import numpy as np
|
|
|
|
|
from paddlevideo.utils import get_config
|
|
|
|
|
import pickle
|
|
|
|
|
|
|
|
|
|
from paddlevideo.utils import (get_logger, load, mkdir, save)
|
|
|
|
|
import shutil
|
|
|
|
|
|
|
|
|
|
FONTFACE = cv2.FONT_HERSHEY_DUPLEX
|
|
|
|
|
FONTSCALE = 0.5
|
|
|
|
|
FONTCOLOR = (255, 255, 255) # BGR, white
|
|
|
|
|
MSGCOLOR = (128, 128, 128) # BGR, gray
|
|
|
|
|
THICKNESS = 1
|
|
|
|
|
LINETYPE = 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def hex2color(h):
|
|
|
|
|
"""Convert the 6-digit hex string to tuple of 3 int value (RGB)"""
|
|
|
|
|
return (int(h[:2], 16), int(h[2:4], 16), int(h[4:], 16))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plate_blue = '03045e-023e8a-0077b6-0096c7-00b4d8-48cae4'
|
|
|
|
|
plate_blue = plate_blue.split('-')
|
|
|
|
|
plate_blue = [hex2color(h) for h in plate_blue]
|
|
|
|
|
plate_green = '004b23-006400-007200-008000-38b000-70e000'
|
|
|
|
|
plate_green = plate_green.split('-')
|
|
|
|
|
plate_green = [hex2color(h) for h in plate_green]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def abbrev(name):
|
|
|
|
|
"""Get the abbreviation of label name:
|
|
|
|
|
'take (an object) from (a person)' -> 'take ... from ...'
|
|
|
|
|
"""
|
|
|
|
|
while name.find('(') != -1:
|
|
|
|
|
st, ed = name.find('('), name.find(')')
|
|
|
|
|
name = name[:st] + '...' + name[ed + 1:]
|
|
|
|
|
return name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# annotations is pred results
|
|
|
|
|
def visualize(frames, annotations, plate=plate_blue, max_num=5):
|
|
|
|
|
"""Visualize frames with predicted annotations.
|
|
|
|
|
Args:
|
|
|
|
|
frames (list[np.ndarray]): Frames for visualization, note that
|
|
|
|
|
len(frames) % len(annotations) should be 0.
|
|
|
|
|
annotations (list[list[tuple]]): The predicted results.
|
|
|
|
|
plate (str): The plate used for visualization. Default: plate_blue.
|
|
|
|
|
max_num (int): Max number of labels to visualize for a person box.
|
|
|
|
|
Default: 5,目前不能大于5.
|
|
|
|
|
Returns:
|
|
|
|
|
list[np.ndarray]: Visualized frames.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
assert max_num + 1 <= len(plate)
|
|
|
|
|
plate = [x[::-1] for x in plate]
|
|
|
|
|
frames_ = cp.deepcopy(frames)
|
|
|
|
|
nf, na = len(frames), len(annotations)
|
|
|
|
|
assert nf % na == 0
|
|
|
|
|
nfpa = len(frames) // len(annotations)
|
|
|
|
|
anno = None
|
|
|
|
|
h, w, _ = frames[0].shape
|
|
|
|
|
# proposals被归一化需要还原真实坐标值
|
|
|
|
|
scale_ratio = np.array([w, h, w, h])
|
|
|
|
|
|
|
|
|
|
for i in range(na):
|
|
|
|
|
anno = annotations[i]
|
|
|
|
|
if anno is None:
|
|
|
|
|
continue
|
|
|
|
|
for j in range(nfpa):
|
|
|
|
|
ind = i * nfpa + j
|
|
|
|
|
frame = frames_[ind]
|
|
|
|
|
for ann in anno:
|
|
|
|
|
box = ann[0]
|
|
|
|
|
label = ann[1]
|
|
|
|
|
if not len(label):
|
|
|
|
|
continue
|
|
|
|
|
score = ann[2]
|
|
|
|
|
box = (box * scale_ratio).astype(np.int64)
|
|
|
|
|
st, ed = tuple(box[:2]), tuple(box[2:])
|
|
|
|
|
cv2.rectangle(frame, st, ed, plate[0], 2)
|
|
|
|
|
for k, lb in enumerate(label):
|
|
|
|
|
if k >= max_num:
|
|
|
|
|
break
|
|
|
|
|
text = abbrev(lb)
|
|
|
|
|
text = ': '.join([text, str(score[k])])
|
|
|
|
|
location = (0 + st[0], 18 + k * 18 + st[1])
|
|
|
|
|
textsize = cv2.getTextSize(text, FONTFACE, FONTSCALE,
|
|
|
|
|
THICKNESS)[0]
|
|
|
|
|
textwidth = textsize[0]
|
|
|
|
|
diag0 = (location[0] + textwidth, location[1] - 14)
|
|
|
|
|
diag1 = (location[0], location[1] + 2)
|
|
|
|
|
cv2.rectangle(frame, diag0, diag1, plate[k + 1], -1)
|
|
|
|
|
cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
|
|
|
|
|
FONTCOLOR, THICKNESS, LINETYPE)
|
|
|
|
|
|
|
|
|
|
return frames_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def frame_extraction(video_path, target_dir):
|
|
|
|
|
"""Extract frames given video_path.
|
|
|
|
|
Args:
|
|
|
|
|
video_path (str): The video_path.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
if not os.path.exists(target_dir):
|
|
|
|
|
os.makedirs(target_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# Should be able to handle videos up to several hours
|
|
|
|
|
frame_tmpl = osp.join(target_dir, '{:05d}.jpg')
|
|
|
|
|
vid = cv2.VideoCapture(video_path)
|
|
|
|
|
|
|
|
|
|
FPS = int(vid.get(5))
|
|
|
|
|
|
|
|
|
|
frames = []
|
|
|
|
|
frame_paths = []
|
|
|
|
|
|
|
|
|
|
flag, frame = vid.read()
|
|
|
|
|
index = 1
|
|
|
|
|
while flag:
|
|
|
|
|
frames.append(frame)
|
|
|
|
|
frame_path = frame_tmpl.format(index)
|
|
|
|
|
frame_paths.append(frame_path)
|
|
|
|
|
cv2.imwrite(frame_path, frame)
|
|
|
|
|
index += 1
|
|
|
|
|
flag, frame = vid.read()
|
|
|
|
|
return frame_paths, frames, FPS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_args():
|
|
|
|
|
def str2bool(v):
|
|
|
|
|
return v.lower() in ("true", "t", "1")
|
|
|
|
|
|
|
|
|
|
# general params
|
|
|
|
|
parser = argparse.ArgumentParser("PaddleVideo Inference model script")
|
|
|
|
|
parser.add_argument('-c',
|
|
|
|
|
'--config',
|
|
|
|
|
type=str,
|
|
|
|
|
default='configs/example.yaml',
|
|
|
|
|
help='config file path')
|
|
|
|
|
|
|
|
|
|
parser.add_argument('--video_path', help='video file/url')
|
|
|
|
|
|
|
|
|
|
parser.add_argument('-o',
|
|
|
|
|
'--override',
|
|
|
|
|
action='append',
|
|
|
|
|
default=[],
|
|
|
|
|
help='config options to be overridden')
|
|
|
|
|
parser.add_argument('-w',
|
|
|
|
|
'--weights',
|
|
|
|
|
type=str,
|
|
|
|
|
help='weights for finetuning or testing')
|
|
|
|
|
|
|
|
|
|
#detection_model_name
|
|
|
|
|
parser.add_argument('--detection_model_name',
|
|
|
|
|
help='the name of detection model ')
|
|
|
|
|
# detection_model_weights
|
|
|
|
|
parser.add_argument('--detection_model_weights',
|
|
|
|
|
help='the weights path of detection model ')
|
|
|
|
|
|
|
|
|
|
# params for predict
|
|
|
|
|
parser.add_argument('--out-filename',
|
|
|
|
|
default='ava_det_demo.mp4',
|
|
|
|
|
help='output filename')
|
|
|
|
|
parser.add_argument('--predict-stepsize',
|
|
|
|
|
default=8,
|
|
|
|
|
type=int,
|
|
|
|
|
help='give out a prediction per n frames')
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
'--output-stepsize',
|
|
|
|
|
default=4,
|
|
|
|
|
type=int,
|
|
|
|
|
help=('show one frame per n frames in the demo, we should have: '
|
|
|
|
|
'predict_stepsize % output_stepsize == 0'))
|
|
|
|
|
parser.add_argument('--output-fps',
|
|
|
|
|
default=6,
|
|
|
|
|
type=int,
|
|
|
|
|
help='the fps of demo video output')
|
|
|
|
|
|
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 一帧的结果。根据概率大小进行排序
|
|
|
|
|
def pack_result(human_detection, result):
|
|
|
|
|
"""Short summary.
|
|
|
|
|
Args:
|
|
|
|
|
human_detection (np.ndarray): Human detection result.
|
|
|
|
|
result (type): The predicted label of each human proposal.
|
|
|
|
|
Returns:
|
|
|
|
|
tuple: Tuple of human proposal, label name and label score.
|
|
|
|
|
"""
|
|
|
|
|
results = []
|
|
|
|
|
if result is None:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
for prop, res in zip(human_detection, result):
|
|
|
|
|
res.sort(key=lambda x: -x[1])
|
|
|
|
|
|
|
|
|
|
results.append((prop, [x[0] for x in res], [x[1] for x in res]))
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 构造数据处理需要的results
|
|
|
|
|
def get_timestep_result(frame_dir, timestamp, clip_len, frame_interval, FPS):
|
|
|
|
|
result = {}
|
|
|
|
|
|
|
|
|
|
result["frame_dir"] = frame_dir
|
|
|
|
|
|
|
|
|
|
frame_num = len(os.listdir(frame_dir))
|
|
|
|
|
|
|
|
|
|
dir_name = frame_dir.split("/")[-1]
|
|
|
|
|
result["video_id"] = dir_name
|
|
|
|
|
|
|
|
|
|
result['timestamp'] = timestamp
|
|
|
|
|
|
|
|
|
|
timestamp_str = '{:04d}'.format(timestamp)
|
|
|
|
|
img_key = dir_name + "," + timestamp_str
|
|
|
|
|
result['img_key'] = img_key
|
|
|
|
|
|
|
|
|
|
result['shot_info'] = (1, frame_num)
|
|
|
|
|
result['fps'] = FPS
|
|
|
|
|
|
|
|
|
|
result['suffix'] = '{:05}.jpg'
|
|
|
|
|
|
|
|
|
|
result['timestamp_start'] = 1
|
|
|
|
|
result['timestamp_end'] = int(frame_num / result['fps'])
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detection_inference(frame_paths, output_dir, model_name, weights_path):
|
|
|
|
|
"""Detect human boxes given frame paths.
|
|
|
|
|
Args:
|
|
|
|
|
frame_paths (list[str]): The paths of frames to do detection inference.
|
|
|
|
|
Returns:
|
|
|
|
|
list[np.ndarray]: The human detection results.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
detection_cfg = ppdet.model_zoo.get_config_file(model_name)
|
|
|
|
|
detection_cfg = ppdet.core.workspace.load_config(detection_cfg)
|
|
|
|
|
detection_trainer = ppdet.engine.Trainer(detection_cfg, mode='test')
|
|
|
|
|
detection_trainer.load_weights(weights_path)
|
|
|
|
|
|
|
|
|
|
print('Performing Human Detection for each frame')
|
|
|
|
|
|
|
|
|
|
detection_trainer.predict(frame_paths, output_dir=output_dir, save_txt=True)
|
|
|
|
|
|
|
|
|
|
print("finish object detection")
|
|
|
|
|
|
|
|
|
|
results = []
|
|
|
|
|
|
|
|
|
|
for frame_path in frame_paths:
|
|
|
|
|
(file_dir, file_name) = os.path.split(frame_path)
|
|
|
|
|
(file_path, ext) = os.path.splitext(frame_path)
|
|
|
|
|
|
|
|
|
|
txt_file_name = file_name.replace(ext, ".txt")
|
|
|
|
|
txt_path = os.path.join(output_dir, txt_file_name)
|
|
|
|
|
results.append(txt_path)
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_detection_result(txt_file_path, img_h, img_w, person_det_score_thr):
|
|
|
|
|
"""
|
|
|
|
|
根据检测结果文件得到图像中人的检测框(proposals)和置信度(scores)
|
|
|
|
|
txt_file_path:检测结果存放路径
|
|
|
|
|
img_h:图像高度
|
|
|
|
|
img_w:图像宽度
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
proposals = []
|
|
|
|
|
scores = []
|
|
|
|
|
|
|
|
|
|
with open(txt_file_path, 'r') as detection_file:
|
|
|
|
|
lines = detection_file.readlines()
|
|
|
|
|
for line in lines: # person 0.9842637181282043 0.0 469.1407470703125 944.7770385742188 831.806396484375
|
|
|
|
|
items = line.split(" ")
|
|
|
|
|
if items[0] != 'person': #只要人
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
score = items[1]
|
|
|
|
|
|
|
|
|
|
if (float)(score) < person_det_score_thr:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
x1 = (float(items[2])) / img_w
|
|
|
|
|
y1 = ((float)(items[3])) / img_h
|
|
|
|
|
box_w = ((float)(items[4]))
|
|
|
|
|
box_h = ((float)(items[5]))
|
|
|
|
|
|
|
|
|
|
x2 = (float(items[2]) + box_w) / img_w
|
|
|
|
|
y2 = (float(items[3]) + box_h) / img_h
|
|
|
|
|
|
|
|
|
|
scores.append(score)
|
|
|
|
|
|
|
|
|
|
proposals.append([x1, y1, x2, y2])
|
|
|
|
|
|
|
|
|
|
return np.array(proposals), np.array(scores)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@paddle.no_grad()
|
|
|
|
|
def main(args):
|
|
|
|
|
config = get_config(args.config, show=False) #parse config file
|
|
|
|
|
|
|
|
|
|
# extract frames from video
|
|
|
|
|
video_path = args.video_path
|
|
|
|
|
frame_dir = 'tmp_frames'
|
|
|
|
|
frame_paths, frames, FPS = frame_extraction(video_path, frame_dir)
|
|
|
|
|
|
|
|
|
|
num_frame = len(frame_paths) #视频秒数*FPS
|
|
|
|
|
assert num_frame != 0
|
|
|
|
|
print("Frame Number:", num_frame)
|
|
|
|
|
|
|
|
|
|
# 帧图像高度和宽度
|
|
|
|
|
h, w, _ = frames[0].shape
|
|
|
|
|
|
|
|
|
|
# Get clip_len, frame_interval and calculate center index of each clip
|
|
|
|
|
data_process_pipeline = build_pipeline(config.PIPELINE.test) #测试时输出处理流水配置
|
|
|
|
|
|
|
|
|
|
clip_len = config.PIPELINE.test.sample['clip_len']
|
|
|
|
|
assert clip_len % 2 == 0, 'We would like to have an even clip_len'
|
|
|
|
|
frame_interval = config.PIPELINE.test.sample['frame_interval']
|
|
|
|
|
|
|
|
|
|
# 此处关键帧每秒取一个
|
|
|
|
|
clip_len = config.PIPELINE.test.sample['clip_len']
|
|
|
|
|
assert clip_len % 2 == 0, 'We would like to have an even clip_len'
|
|
|
|
|
frame_interval = config.PIPELINE.test.sample['frame_interval']
|
|
|
|
|
window_size = clip_len * frame_interval
|
|
|
|
|
timestamps = np.arange(window_size // 2, (num_frame + 1 - window_size // 2),
|
|
|
|
|
args.predict_stepsize)
|
|
|
|
|
print("timetamps number:", len(timestamps))
|
|
|
|
|
|
|
|
|
|
# get selected frame list according to timestamps
|
|
|
|
|
selected_frame_list = []
|
|
|
|
|
for timestamp in timestamps:
|
|
|
|
|
selected_frame_list.append(frame_paths[timestamp - 1])
|
|
|
|
|
|
|
|
|
|
# Load label_map
|
|
|
|
|
label_map_path = config.DATASET.test['label_file']
|
|
|
|
|
categories, class_whitelist = read_labelmap(open(label_map_path))
|
|
|
|
|
label_map = {}
|
|
|
|
|
for item in categories:
|
|
|
|
|
id = item['id']
|
|
|
|
|
name = item['name']
|
|
|
|
|
label_map[id] = name
|
|
|
|
|
|
|
|
|
|
# Construct model.
|
|
|
|
|
if config.MODEL.backbone.get('pretrained'):
|
|
|
|
|
config.MODEL.backbone.pretrained = '' # disable pretrain model init
|
|
|
|
|
model = build_model(config.MODEL)
|
|
|
|
|
|
|
|
|
|
model.eval()
|
|
|
|
|
state_dicts = load(args.weights)
|
|
|
|
|
model.set_state_dict(state_dicts)
|
|
|
|
|
|
|
|
|
|
detection_result_dir = 'tmp_detection'
|
|
|
|
|
detection_model_name = args.detection_model_name
|
|
|
|
|
detection_model_weights = args.detection_model_weights
|
|
|
|
|
detection_txt_list = detection_inference(selected_frame_list,
|
|
|
|
|
detection_result_dir,
|
|
|
|
|
detection_model_name,
|
|
|
|
|
detection_model_weights)
|
|
|
|
|
assert len(detection_txt_list) == len(timestamps)
|
|
|
|
|
|
|
|
|
|
print('Performing SpatioTemporal Action Detection for each clip')
|
|
|
|
|
human_detections = []
|
|
|
|
|
predictions = []
|
|
|
|
|
|
|
|
|
|
index = 0
|
|
|
|
|
for timestamp, detection_txt_path in zip(timestamps, detection_txt_list):
|
|
|
|
|
proposals, scores = get_detection_result(
|
|
|
|
|
detection_txt_path, h, w,
|
|
|
|
|
(float)(config.DATASET.test['person_det_score_thr']))
|
|
|
|
|
if proposals.shape[0] == 0:
|
|
|
|
|
predictions.append(None)
|
|
|
|
|
human_detections.append(None)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
human_detections.append(proposals)
|
|
|
|
|
|
|
|
|
|
result = get_timestep_result(frame_dir,
|
|
|
|
|
timestamp,
|
|
|
|
|
clip_len,
|
|
|
|
|
frame_interval,
|
|
|
|
|
FPS=FPS)
|
|
|
|
|
result["proposals"] = proposals
|
|
|
|
|
result["scores"] = scores
|
|
|
|
|
|
|
|
|
|
new_result = data_process_pipeline(result)
|
|
|
|
|
proposals = new_result['proposals']
|
|
|
|
|
|
|
|
|
|
img_slow = new_result['imgs'][0]
|
|
|
|
|
img_slow = img_slow[np.newaxis, :]
|
|
|
|
|
img_fast = new_result['imgs'][1]
|
|
|
|
|
img_fast = img_fast[np.newaxis, :]
|
|
|
|
|
|
|
|
|
|
proposals = proposals[np.newaxis, :]
|
|
|
|
|
|
|
|
|
|
scores = scores[np.newaxis, :]
|
|
|
|
|
|
|
|
|
|
img_shape = np.asarray(new_result['img_shape'])
|
|
|
|
|
img_shape = img_shape[np.newaxis, :]
|
|
|
|
|
|
|
|
|
|
data = [
|
|
|
|
|
paddle.to_tensor(img_slow, dtype='float32'),
|
|
|
|
|
paddle.to_tensor(img_fast, dtype='float32'),
|
|
|
|
|
paddle.to_tensor(proposals, dtype='float32'), scores,
|
|
|
|
|
paddle.to_tensor(img_shape, dtype='int32')
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
with paddle.no_grad():
|
|
|
|
|
result = model(data, mode='infer')
|
|
|
|
|
|
|
|
|
|
result = result[0]
|
|
|
|
|
prediction = []
|
|
|
|
|
|
|
|
|
|
person_num = proposals.shape[1]
|
|
|
|
|
# N proposals
|
|
|
|
|
for i in range(person_num):
|
|
|
|
|
prediction.append([])
|
|
|
|
|
|
|
|
|
|
# Perform action score thr
|
|
|
|
|
for i in range(len(result)):
|
|
|
|
|
if i + 1 not in class_whitelist:
|
|
|
|
|
continue
|
|
|
|
|
for j in range(person_num):
|
|
|
|
|
if result[i][j, 4] > config.MODEL.head['action_thr']:
|
|
|
|
|
prediction[j].append((label_map[i + 1], result[i][j,
|
|
|
|
|
4]))
|
|
|
|
|
predictions.append(prediction)
|
|
|
|
|
|
|
|
|
|
index = index + 1
|
|
|
|
|
if index % 10 == 0:
|
|
|
|
|
print(index, "/", len(timestamps))
|
|
|
|
|
|
|
|
|
|
results = []
|
|
|
|
|
for human_detection, prediction in zip(human_detections, predictions):
|
|
|
|
|
results.append(pack_result(human_detection, prediction))
|
|
|
|
|
|
|
|
|
|
def dense_timestamps(timestamps, n):
|
|
|
|
|
"""Make it nx frames."""
|
|
|
|
|
old_frame_interval = (timestamps[1] - timestamps[0])
|
|
|
|
|
start = timestamps[0] - old_frame_interval / n * (n - 1) / 2
|
|
|
|
|
new_frame_inds = np.arange(
|
|
|
|
|
len(timestamps) * n) * old_frame_interval / n + start
|
|
|
|
|
return new_frame_inds.astype(np.int)
|
|
|
|
|
|
|
|
|
|
dense_n = int(args.predict_stepsize / args.output_stepsize) #30
|
|
|
|
|
frames = [
|
|
|
|
|
cv2.imread(frame_paths[i - 1])
|
|
|
|
|
for i in dense_timestamps(timestamps, dense_n)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
vis_frames = visualize(frames, results)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
import moviepy.editor as mpy
|
|
|
|
|
except ImportError:
|
|
|
|
|
raise ImportError('Please install moviepy to enable output file')
|
|
|
|
|
|
|
|
|
|
vid = mpy.ImageSequenceClip([x[:, :, ::-1] for x in vis_frames],
|
|
|
|
|
fps=args.output_fps)
|
|
|
|
|
vid.write_videofile(args.out_filename)
|
|
|
|
|
print("finish write !")
|
|
|
|
|
|
|
|
|
|
# delete tmp files and dirs
|
|
|
|
|
shutil.rmtree(frame_dir)
|
|
|
|
|
shutil.rmtree(detection_result_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
args = parse_args() #解析参数
|
|
|
|
|
main(args)
|