You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

305 lines
11 KiB
Python

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import os
import json
import numpy as np
import pandas as pd
import multiprocessing as mp
from .registry import METRIC
from .base import BaseMetric
from .ActivityNet import ANETproposal
from paddlevideo.utils import get_logger
logger = get_logger("paddlevideo")
def iou_with_anchors(anchors_min, anchors_max, box_min, box_max):
"""Compute jaccard score between a box and the anchors.
"""
len_anchors = anchors_max - anchors_min
int_xmin = np.maximum(anchors_min, box_min)
int_xmax = np.minimum(anchors_max, box_max)
inter_len = np.maximum(int_xmax - int_xmin, 0.)
union_len = len_anchors - inter_len + box_max - box_min
jaccard = np.divide(inter_len, union_len)
return jaccard
def boundary_choose(score_list):
"""Choose start and end boundary from score.
"""
max_score = max(score_list)
mask_high = (score_list > max_score * 0.5)
score_list = list(score_list)
score_middle = np.array([0.0] + score_list + [0.0])
score_front = np.array([0.0, 0.0] + score_list)
score_back = np.array(score_list + [0.0, 0.0])
mask_peak = ((score_middle > score_front) & (score_middle > score_back))
mask_peak = mask_peak[1:-1]
mask = (mask_high | mask_peak).astype('float32')
return mask
def soft_nms(df, alpha, t1, t2):
'''
df: proposals generated by network;
alpha: alpha value of Gaussian decaying function;
t1, t2: threshold for soft nms.
'''
df = df.sort_values(by="score", ascending=False)
tstart = list(df.xmin.values[:])
tend = list(df.xmax.values[:])
tscore = list(df.score.values[:])
rstart = []
rend = []
rscore = []
while len(tscore) > 1 and len(rscore) < 101:
max_index = tscore.index(max(tscore))
tmp_iou_list = iou_with_anchors(np.array(tstart), np.array(tend),
tstart[max_index], tend[max_index])
for idx in range(0, len(tscore)):
if idx != max_index:
tmp_iou = tmp_iou_list[idx]
tmp_width = tend[max_index] - tstart[max_index]
if tmp_iou > t1 + (t2 - t1) * tmp_width:
tscore[idx] = tscore[idx] * np.exp(
-np.square(tmp_iou) / alpha)
rstart.append(tstart[max_index])
rend.append(tend[max_index])
rscore.append(tscore[max_index])
tstart.pop(max_index)
tend.pop(max_index)
tscore.pop(max_index)
newDf = pd.DataFrame()
newDf['score'] = rscore
newDf['xmin'] = rstart
newDf['xmax'] = rend
return newDf
@METRIC.register
class BMNMetric(BaseMetric):
"""
Metrics for BMN. Two Stages in this metric:
(1) Get test results using trained model, results will be saved in BMNMetric.result_path;
(2) Calculate metrics using results file from stage (1).
"""
def __init__(self,
data_size,
batch_size,
tscale,
dscale,
file_path,
ground_truth_filename,
subset,
output_path,
result_path,
get_metrics=True,
log_interval=1):
"""
Init for BMN metrics.
Params:
get_metrics: whether to calculate AR@N and AUC metrics or not, default True.
"""
super().__init__(data_size, batch_size, log_interval)
assert self.batch_size == 1, " Now we just support batch_size==1 test"
assert self.world_size == 1, " Now we just support single-card test"
self.tscale = tscale
self.dscale = dscale
self.file_path = file_path
self.ground_truth_filename = ground_truth_filename
self.subset = subset
self.output_path = output_path
self.result_path = result_path
self.get_metrics = get_metrics
if not os.path.isdir(self.output_path):
os.makedirs(self.output_path)
if not os.path.isdir(self.result_path):
os.makedirs(self.result_path)
self.video_dict, self.video_list = self.get_dataset_dict(
self.file_path, self.subset)
def get_dataset_dict(self, file_path, subset):
annos = json.load(open(file_path))
video_dict = {}
for video_name in annos.keys():
video_subset = annos[video_name]["subset"]
if subset in video_subset:
video_dict[video_name] = annos[video_name]
video_list = list(video_dict.keys())
video_list.sort()
return video_dict, video_list
def update(self, batch_id, data, outputs):
"""update metrics during each iter
"""
fid = data[4].numpy()
pred_bm, pred_start, pred_end = outputs
pred_bm = pred_bm.numpy()
pred_start = pred_start[0].numpy()
pred_end = pred_end[0].numpy()
snippet_xmins = [1.0 / self.tscale * i for i in range(self.tscale)]
snippet_xmaxs = [
1.0 / self.tscale * i for i in range(1, self.tscale + 1)
]
cols = ["xmin", "xmax", "score"]
video_name = self.video_list[fid[0]]
pred_bm = pred_bm[0, 0, :, :] * pred_bm[0, 1, :, :]
start_mask = boundary_choose(pred_start)
start_mask[0] = 1.
end_mask = boundary_choose(pred_end)
end_mask[-1] = 1.
score_vector_list = []
for idx in range(self.dscale):
for jdx in range(self.tscale):
start_index = jdx
end_index = start_index + idx
if end_index < self.tscale and start_mask[
start_index] == 1 and end_mask[end_index] == 1:
xmin = snippet_xmins[start_index]
xmax = snippet_xmaxs[end_index]
xmin_score = pred_start[start_index]
xmax_score = pred_end[end_index]
bm_score = pred_bm[idx, jdx]
conf_score = xmin_score * xmax_score * bm_score
score_vector_list.append([xmin, xmax, conf_score])
score_vector_list = np.stack(score_vector_list)
video_df = pd.DataFrame(score_vector_list, columns=cols)
video_df.to_csv(os.path.join(self.output_path, "%s.csv" % video_name),
index=False)
if batch_id % self.log_interval == 0:
logger.info("Processing................ batch {}".format(batch_id))
def accumulate(self):
"""accumulate metrics when finished all iters.
"""
# check clip index of each video
#Stage1
self.bmn_post_processing(self.video_dict, self.subset, self.output_path,
self.result_path)
if self.get_metrics:
logger.info("[TEST] calculate metrics...")
#Stage2
uniform_average_nr_proposals_valid, uniform_average_recall_valid, uniform_recall_valid = self.cal_metrics(
self.ground_truth_filename,
os.path.join(self.result_path, "bmn_results_validation.json"),
max_avg_nr_proposals=100,
tiou_thresholds=np.linspace(0.5, 0.95, 10),
subset='validation')
logger.info("AR@1; AR@5; AR@10; AR@100")
logger.info("%.02f %.02f %.02f %.02f" %
(100 * np.mean(uniform_recall_valid[:, 0]),
100 * np.mean(uniform_recall_valid[:, 4]),
100 * np.mean(uniform_recall_valid[:, 9]),
100 * np.mean(uniform_recall_valid[:, -1])))
def bmn_post_processing(self, video_dict, subset, output_path, result_path):
video_list = list(video_dict.keys())
global result_dict
result_dict = mp.Manager().dict()
pp_num = 12
num_videos = len(video_list)
num_videos_per_thread = int(num_videos / pp_num)
processes = []
for tid in range(pp_num - 1):
tmp_video_list = video_list[tid * num_videos_per_thread:(tid + 1) *
num_videos_per_thread]
p = mp.Process(target=self.video_process,
args=(tmp_video_list, video_dict, output_path,
result_dict))
p.start()
processes.append(p)
tmp_video_list = video_list[(pp_num - 1) * num_videos_per_thread:]
p = mp.Process(target=self.video_process,
args=(tmp_video_list, video_dict, output_path,
result_dict))
p.start()
processes.append(p)
for p in processes:
p.join()
result_dict = dict(result_dict)
output_dict = {
"version": "VERSION 1.3",
"results": result_dict,
"external_data": {}
}
outfile = open(
os.path.join(result_path, "bmn_results_%s.json" % subset), "w")
# json.dump(output_dict, outfile)
# in case of file name in chinese
json.dump(output_dict, outfile, ensure_ascii=False)
outfile.close()
def video_process(self,
video_list,
video_dict,
output_path,
result_dict,
snms_alpha=0.4,
snms_t1=0.55,
snms_t2=0.9):
for video_name in video_list:
logger.info("Processing video........" + video_name)
df = pd.read_csv(os.path.join(output_path, video_name + ".csv"))
if len(df) > 1:
df = soft_nms(df, snms_alpha, snms_t1, snms_t2)
video_duration = video_dict[video_name]["duration_second"]
proposal_list = []
for idx in range(min(100, len(df))):
tmp_prop={"score":df.score.values[idx], \
"segment":[max(0,df.xmin.values[idx])*video_duration, \
min(1,df.xmax.values[idx])*video_duration]}
proposal_list.append(tmp_prop)
video_name = video_name[2:] if video_name[:2] == 'v_' else video_name
result_dict[video_name] = proposal_list
def cal_metrics(self,
ground_truth_filename,
proposal_filename,
max_avg_nr_proposals=100,
tiou_thresholds=np.linspace(0.5, 0.95, 10),
subset='validation'):
anet_proposal = ANETproposal(ground_truth_filename,
proposal_filename,
tiou_thresholds=tiou_thresholds,
max_avg_nr_proposals=max_avg_nr_proposals,
subset=subset,
verbose=True,
check_status=False)
anet_proposal.evaluate()
recall = anet_proposal.recall
average_recall = anet_proposal.avg_recall
average_nr_proposals = anet_proposal.proposals_per_video
return (average_nr_proposals, average_recall, recall)