You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

390 lines
12 KiB
Python

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import numpy as np
import argparse
import pandas as pd
from .registry import METRIC
from .base import BaseMetric
from paddlevideo.utils import get_logger
logger = get_logger("paddlevideo")
def get_labels_scores_start_end_time(input_np,
frame_wise_labels,
actions_dict,
bg_class=["background", "None"]):
labels = []
starts = []
ends = []
scores = []
boundary_score_ptr = 0
last_label = frame_wise_labels[0]
if frame_wise_labels[0] not in bg_class:
labels.append(frame_wise_labels[0])
starts.append(0)
for i in range(len(frame_wise_labels)):
if frame_wise_labels[i] != last_label:
if frame_wise_labels[i] not in bg_class:
labels.append(frame_wise_labels[i])
starts.append(i)
if last_label not in bg_class:
ends.append(i)
score = np.mean(
input_np[actions_dict[labels[boundary_score_ptr]], \
starts[boundary_score_ptr]:(ends[boundary_score_ptr] + 1)]
)
scores.append(score)
boundary_score_ptr = boundary_score_ptr + 1
last_label = frame_wise_labels[i]
if last_label not in bg_class:
ends.append(i + 1)
score = np.mean(
input_np[actions_dict[labels[boundary_score_ptr]], \
starts[boundary_score_ptr]:(ends[boundary_score_ptr] + 1)]
)
scores.append(score)
boundary_score_ptr = boundary_score_ptr + 1
return labels, starts, ends, scores
def get_labels_start_end_time(frame_wise_labels,
bg_class=["background", "None"]):
labels = []
starts = []
ends = []
last_label = frame_wise_labels[0]
if frame_wise_labels[0] not in bg_class:
labels.append(frame_wise_labels[0])
starts.append(0)
for i in range(len(frame_wise_labels)):
if frame_wise_labels[i] != last_label:
if frame_wise_labels[i] not in bg_class:
labels.append(frame_wise_labels[i])
starts.append(i)
if last_label not in bg_class:
ends.append(i)
last_label = frame_wise_labels[i]
if last_label not in bg_class:
ends.append(i + 1)
return labels, starts, ends
def levenstein(p, y, norm=False):
m_row = len(p)
n_col = len(y)
D = np.zeros([m_row + 1, n_col + 1], np.float)
for i in range(m_row + 1):
D[i, 0] = i
for i in range(n_col + 1):
D[0, i] = i
for j in range(1, n_col + 1):
for i in range(1, m_row + 1):
if y[j - 1] == p[i - 1]:
D[i, j] = D[i - 1, j - 1]
else:
D[i, j] = min(D[i - 1, j] + 1, D[i, j - 1] + 1,
D[i - 1, j - 1] + 1)
if norm:
score = (1 - D[-1, -1] / max(m_row, n_col)) * 100
else:
score = D[-1, -1]
return score
def edit_score(recognized,
ground_truth,
norm=True,
bg_class=["background", "None"]):
P, _, _ = get_labels_start_end_time(recognized, bg_class)
Y, _, _ = get_labels_start_end_time(ground_truth, bg_class)
return levenstein(P, Y, norm)
def f_score(recognized, ground_truth, overlap, bg_class=["background", "None"]):
p_label, p_start, p_end = get_labels_start_end_time(recognized, bg_class)
y_label, y_start, y_end = get_labels_start_end_time(ground_truth, bg_class)
tp = 0
fp = 0
hits = np.zeros(len(y_label))
for j in range(len(p_label)):
intersection = np.minimum(p_end[j], y_end) - np.maximum(
p_start[j], y_start)
union = np.maximum(p_end[j], y_end) - np.minimum(p_start[j], y_start)
IoU = (1.0 * intersection / union) * (
[p_label[j] == y_label[x] for x in range(len(y_label))])
# Get the best scoring segment
idx = np.array(IoU).argmax()
if IoU[idx] >= overlap and not hits[idx]:
tp += 1
hits[idx] = 1
else:
fp += 1
fn = len(y_label) - sum(hits)
return float(tp), float(fp), float(fn)
def boundary_AR(pred_boundary, gt_boundary, overlap_list, max_proposal):
p_label, p_start, p_end, p_scores = pred_boundary
y_label, y_start, y_end, _ = gt_boundary
# sort proposal
pred_dict = {
"label": p_label,
"start": p_start,
"end": p_end,
"scores": p_scores
}
pdf = pd.DataFrame(pred_dict)
pdf = pdf.sort_values(by="scores", ascending=False)
p_label = list(pdf["label"])
p_start = list(pdf["start"])
p_end = list(pdf["end"])
p_scores = list(pdf["scores"])
# refine AN
if len(p_label) < max_proposal and len(p_label) > 0:
p_label = p_label + [p_label[-1]] * (max_proposal - len(p_label))
p_start = p_start + [p_start[-1]] * (max_proposal - len(p_start))
p_start = p_start + p_start[len(p_start) -
(max_proposal - len(p_start)):]
p_end = p_end + [p_end[-1]] * (max_proposal - len(p_end))
p_scores = p_scores + [p_scores[-1]] * (max_proposal - len(p_scores))
elif len(p_label) > max_proposal:
p_label[max_proposal:] = []
p_start[max_proposal:] = []
p_end[max_proposal:] = []
p_scores[max_proposal:] = []
t_AR = np.zeros(len(overlap_list))
for i in range(len(overlap_list)):
overlap = overlap_list[i]
tp = 0
fp = 0
hits = np.zeros(len(y_label))
for j in range(len(p_label)):
intersection = np.minimum(p_end[j], y_end) - np.maximum(
p_start[j], y_start)
union = np.maximum(p_end[j], y_end) - np.minimum(
p_start[j], y_start)
IoU = (1.0 * intersection / union)
# Get the best scoring segment
idx = np.array(IoU).argmax()
if IoU[idx] >= overlap and not hits[idx]:
tp += 1
hits[idx] = 1
else:
fp += 1
fn = len(y_label) - sum(hits)
recall = float(tp) / (float(tp) + float(fn))
t_AR[i] = recall
AR = np.mean(t_AR)
return AR
@METRIC.register
class SegmentationMetric(BaseMetric):
"""
Test for Video Segmentation based model.
"""
def __init__(self,
data_size,
batch_size,
overlap,
actions_map_file_path,
log_interval=1,
tolerance=5,
boundary_threshold=0.7,
max_proposal=100):
"""prepare for metrics
"""
super().__init__(data_size, batch_size, log_interval)
# actions dict generate
file_ptr = open(actions_map_file_path, 'r')
actions = file_ptr.read().split('\n')[:-1]
file_ptr.close()
self.actions_dict = dict()
for a in actions:
self.actions_dict[a.split()[1]] = int(a.split()[0])
# cls score
self.overlap = overlap
self.overlap_len = len(overlap)
self.cls_tp = np.zeros(self.overlap_len)
self.cls_fp = np.zeros(self.overlap_len)
self.cls_fn = np.zeros(self.overlap_len)
self.total_correct = 0
self.total_edit = 0
self.total_frame = 0
self.total_video = 0
# boundary score
self.max_proposal = max_proposal
self.AR_at_AN = [[] for _ in range(max_proposal)]
def update(self, batch_id, data, outputs):
"""update metrics during each iter
"""
groundTruth = data[1]
predicted = outputs['predict']
output_np = outputs['output_np']
outputs_np = predicted.numpy()
outputs_arr = output_np.numpy()[0, :]
gt_np = groundTruth.numpy()[0, :]
recognition = []
for i in range(outputs_np.shape[0]):
recognition = np.concatenate((recognition, [
list(self.actions_dict.keys())[list(
self.actions_dict.values()).index(outputs_np[i])]
]))
recog_content = list(recognition)
gt_content = []
for i in range(gt_np.shape[0]):
gt_content = np.concatenate((gt_content, [
list(self.actions_dict.keys())[list(
self.actions_dict.values()).index(gt_np[i])]
]))
gt_content = list(gt_content)
pred_boundary = get_labels_scores_start_end_time(
outputs_arr, recog_content, self.actions_dict)
gt_boundary = get_labels_scores_start_end_time(
np.ones(outputs_arr.shape), gt_content, self.actions_dict)
# cls score
correct = 0
total = 0
edit = 0
for i in range(len(gt_content)):
total += 1
#accumulate
self.total_frame += 1
if gt_content[i] == recog_content[i]:
correct += 1
#accumulate
self.total_correct += 1
edit_num = edit_score(recog_content, gt_content)
edit += edit_num
self.total_edit += edit_num
for s in range(self.overlap_len):
tp1, fp1, fn1 = f_score(recog_content, gt_content, self.overlap[s])
# accumulate
self.cls_tp[s] += tp1
self.cls_fp[s] += fp1
self.cls_fn[s] += fn1
# accumulate
self.total_video += 1
# proposal score
for AN in range(self.max_proposal):
AR = boundary_AR(pred_boundary,
gt_boundary,
self.overlap,
max_proposal=(AN + 1))
self.AR_at_AN[AN].append(AR)
def accumulate(self):
"""accumulate metrics when finished all iters.
"""
# cls metric
Acc = 100 * float(self.total_correct) / self.total_frame
Edit = (1.0 * self.total_edit) / self.total_video
Fscore = dict()
for s in range(self.overlap_len):
precision = self.cls_tp[s] / float(self.cls_tp[s] + self.cls_fp[s])
recall = self.cls_tp[s] / float(self.cls_tp[s] + self.cls_fn[s])
f1 = 2.0 * (precision * recall) / (precision + recall)
f1 = np.nan_to_num(f1) * 100
Fscore[self.overlap[s]] = f1
# proposal metric
proposal_AUC = np.array(self.AR_at_AN) * 100
AUC = np.mean(proposal_AUC)
AR_at_AN1 = np.mean(proposal_AUC[0, :])
AR_at_AN5 = np.mean(proposal_AUC[4, :])
AR_at_AN15 = np.mean(proposal_AUC[14, :])
# log metric
log_mertic_info = "dataset model performence: "
# preds ensemble
log_mertic_info += "Acc: {:.4f}, ".format(Acc)
log_mertic_info += 'Edit: {:.4f}, '.format(Edit)
for s in range(len(self.overlap)):
log_mertic_info += 'F1@{:0.2f}: {:.4f}, '.format(
self.overlap[s], Fscore[self.overlap[s]])
# boundary metric
log_mertic_info += "Auc: {:.4f}, ".format(AUC)
log_mertic_info += "AR@AN1: {:.4f}, ".format(AR_at_AN1)
log_mertic_info += "AR@AN5: {:.4f}, ".format(AR_at_AN5)
log_mertic_info += "AR@AN15: {:.4f}, ".format(AR_at_AN15)
logger.info(log_mertic_info)
# log metric
metric_dict = dict()
metric_dict['Acc'] = Acc
metric_dict['Edit'] = Edit
for s in range(len(self.overlap)):
metric_dict['F1@{:0.2f}'.format(
self.overlap[s])] = Fscore[self.overlap[s]]
metric_dict['Auc'] = AUC
metric_dict['AR@AN1'] = AR_at_AN1
metric_dict['AR@AN5'] = AR_at_AN5
metric_dict['AR@AN15'] = AR_at_AN15
# clear for next epoch
# cls
self.cls_tp = np.zeros(self.overlap_len)
self.cls_fp = np.zeros(self.overlap_len)
self.cls_fn = np.zeros(self.overlap_len)
self.total_correct = 0
self.total_edit = 0
self.total_frame = 0
self.total_video = 0
# proposal
self.AR_at_AN = [[] for _ in range(self.max_proposal)]
return metric_dict