You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
390 lines
12 KiB
Python
390 lines
12 KiB
Python
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
import numpy as np
|
|
import argparse
|
|
import pandas as pd
|
|
|
|
from .registry import METRIC
|
|
from .base import BaseMetric
|
|
from paddlevideo.utils import get_logger
|
|
|
|
logger = get_logger("paddlevideo")
|
|
|
|
|
|
def get_labels_scores_start_end_time(input_np,
|
|
frame_wise_labels,
|
|
actions_dict,
|
|
bg_class=["background", "None"]):
|
|
labels = []
|
|
starts = []
|
|
ends = []
|
|
scores = []
|
|
|
|
boundary_score_ptr = 0
|
|
|
|
last_label = frame_wise_labels[0]
|
|
if frame_wise_labels[0] not in bg_class:
|
|
labels.append(frame_wise_labels[0])
|
|
starts.append(0)
|
|
for i in range(len(frame_wise_labels)):
|
|
if frame_wise_labels[i] != last_label:
|
|
if frame_wise_labels[i] not in bg_class:
|
|
labels.append(frame_wise_labels[i])
|
|
starts.append(i)
|
|
if last_label not in bg_class:
|
|
ends.append(i)
|
|
score = np.mean(
|
|
input_np[actions_dict[labels[boundary_score_ptr]], \
|
|
starts[boundary_score_ptr]:(ends[boundary_score_ptr] + 1)]
|
|
)
|
|
scores.append(score)
|
|
boundary_score_ptr = boundary_score_ptr + 1
|
|
last_label = frame_wise_labels[i]
|
|
if last_label not in bg_class:
|
|
ends.append(i + 1)
|
|
score = np.mean(
|
|
input_np[actions_dict[labels[boundary_score_ptr]], \
|
|
starts[boundary_score_ptr]:(ends[boundary_score_ptr] + 1)]
|
|
)
|
|
scores.append(score)
|
|
boundary_score_ptr = boundary_score_ptr + 1
|
|
|
|
return labels, starts, ends, scores
|
|
|
|
|
|
def get_labels_start_end_time(frame_wise_labels,
|
|
bg_class=["background", "None"]):
|
|
labels = []
|
|
starts = []
|
|
ends = []
|
|
last_label = frame_wise_labels[0]
|
|
if frame_wise_labels[0] not in bg_class:
|
|
labels.append(frame_wise_labels[0])
|
|
starts.append(0)
|
|
for i in range(len(frame_wise_labels)):
|
|
if frame_wise_labels[i] != last_label:
|
|
if frame_wise_labels[i] not in bg_class:
|
|
labels.append(frame_wise_labels[i])
|
|
starts.append(i)
|
|
if last_label not in bg_class:
|
|
ends.append(i)
|
|
last_label = frame_wise_labels[i]
|
|
if last_label not in bg_class:
|
|
ends.append(i + 1)
|
|
return labels, starts, ends
|
|
|
|
|
|
def levenstein(p, y, norm=False):
|
|
m_row = len(p)
|
|
n_col = len(y)
|
|
D = np.zeros([m_row + 1, n_col + 1], np.float)
|
|
for i in range(m_row + 1):
|
|
D[i, 0] = i
|
|
for i in range(n_col + 1):
|
|
D[0, i] = i
|
|
|
|
for j in range(1, n_col + 1):
|
|
for i in range(1, m_row + 1):
|
|
if y[j - 1] == p[i - 1]:
|
|
D[i, j] = D[i - 1, j - 1]
|
|
else:
|
|
D[i, j] = min(D[i - 1, j] + 1, D[i, j - 1] + 1,
|
|
D[i - 1, j - 1] + 1)
|
|
|
|
if norm:
|
|
score = (1 - D[-1, -1] / max(m_row, n_col)) * 100
|
|
else:
|
|
score = D[-1, -1]
|
|
|
|
return score
|
|
|
|
|
|
def edit_score(recognized,
|
|
ground_truth,
|
|
norm=True,
|
|
bg_class=["background", "None"]):
|
|
P, _, _ = get_labels_start_end_time(recognized, bg_class)
|
|
Y, _, _ = get_labels_start_end_time(ground_truth, bg_class)
|
|
return levenstein(P, Y, norm)
|
|
|
|
|
|
def f_score(recognized, ground_truth, overlap, bg_class=["background", "None"]):
|
|
p_label, p_start, p_end = get_labels_start_end_time(recognized, bg_class)
|
|
y_label, y_start, y_end = get_labels_start_end_time(ground_truth, bg_class)
|
|
|
|
tp = 0
|
|
fp = 0
|
|
|
|
hits = np.zeros(len(y_label))
|
|
|
|
for j in range(len(p_label)):
|
|
intersection = np.minimum(p_end[j], y_end) - np.maximum(
|
|
p_start[j], y_start)
|
|
union = np.maximum(p_end[j], y_end) - np.minimum(p_start[j], y_start)
|
|
IoU = (1.0 * intersection / union) * (
|
|
[p_label[j] == y_label[x] for x in range(len(y_label))])
|
|
# Get the best scoring segment
|
|
idx = np.array(IoU).argmax()
|
|
|
|
if IoU[idx] >= overlap and not hits[idx]:
|
|
tp += 1
|
|
hits[idx] = 1
|
|
else:
|
|
fp += 1
|
|
fn = len(y_label) - sum(hits)
|
|
return float(tp), float(fp), float(fn)
|
|
|
|
|
|
def boundary_AR(pred_boundary, gt_boundary, overlap_list, max_proposal):
|
|
|
|
p_label, p_start, p_end, p_scores = pred_boundary
|
|
y_label, y_start, y_end, _ = gt_boundary
|
|
|
|
# sort proposal
|
|
pred_dict = {
|
|
"label": p_label,
|
|
"start": p_start,
|
|
"end": p_end,
|
|
"scores": p_scores
|
|
}
|
|
pdf = pd.DataFrame(pred_dict)
|
|
pdf = pdf.sort_values(by="scores", ascending=False)
|
|
p_label = list(pdf["label"])
|
|
p_start = list(pdf["start"])
|
|
p_end = list(pdf["end"])
|
|
p_scores = list(pdf["scores"])
|
|
|
|
# refine AN
|
|
if len(p_label) < max_proposal and len(p_label) > 0:
|
|
p_label = p_label + [p_label[-1]] * (max_proposal - len(p_label))
|
|
p_start = p_start + [p_start[-1]] * (max_proposal - len(p_start))
|
|
p_start = p_start + p_start[len(p_start) -
|
|
(max_proposal - len(p_start)):]
|
|
p_end = p_end + [p_end[-1]] * (max_proposal - len(p_end))
|
|
p_scores = p_scores + [p_scores[-1]] * (max_proposal - len(p_scores))
|
|
elif len(p_label) > max_proposal:
|
|
p_label[max_proposal:] = []
|
|
p_start[max_proposal:] = []
|
|
p_end[max_proposal:] = []
|
|
p_scores[max_proposal:] = []
|
|
|
|
t_AR = np.zeros(len(overlap_list))
|
|
|
|
for i in range(len(overlap_list)):
|
|
overlap = overlap_list[i]
|
|
|
|
tp = 0
|
|
fp = 0
|
|
hits = np.zeros(len(y_label))
|
|
|
|
for j in range(len(p_label)):
|
|
intersection = np.minimum(p_end[j], y_end) - np.maximum(
|
|
p_start[j], y_start)
|
|
union = np.maximum(p_end[j], y_end) - np.minimum(
|
|
p_start[j], y_start)
|
|
IoU = (1.0 * intersection / union)
|
|
# Get the best scoring segment
|
|
idx = np.array(IoU).argmax()
|
|
|
|
if IoU[idx] >= overlap and not hits[idx]:
|
|
tp += 1
|
|
hits[idx] = 1
|
|
else:
|
|
fp += 1
|
|
fn = len(y_label) - sum(hits)
|
|
|
|
recall = float(tp) / (float(tp) + float(fn))
|
|
t_AR[i] = recall
|
|
|
|
AR = np.mean(t_AR)
|
|
return AR
|
|
|
|
|
|
@METRIC.register
|
|
class SegmentationMetric(BaseMetric):
|
|
"""
|
|
Test for Video Segmentation based model.
|
|
"""
|
|
|
|
def __init__(self,
|
|
data_size,
|
|
batch_size,
|
|
overlap,
|
|
actions_map_file_path,
|
|
log_interval=1,
|
|
tolerance=5,
|
|
boundary_threshold=0.7,
|
|
max_proposal=100):
|
|
"""prepare for metrics
|
|
"""
|
|
super().__init__(data_size, batch_size, log_interval)
|
|
# actions dict generate
|
|
file_ptr = open(actions_map_file_path, 'r')
|
|
actions = file_ptr.read().split('\n')[:-1]
|
|
file_ptr.close()
|
|
self.actions_dict = dict()
|
|
for a in actions:
|
|
self.actions_dict[a.split()[1]] = int(a.split()[0])
|
|
|
|
# cls score
|
|
self.overlap = overlap
|
|
self.overlap_len = len(overlap)
|
|
|
|
self.cls_tp = np.zeros(self.overlap_len)
|
|
self.cls_fp = np.zeros(self.overlap_len)
|
|
self.cls_fn = np.zeros(self.overlap_len)
|
|
self.total_correct = 0
|
|
self.total_edit = 0
|
|
self.total_frame = 0
|
|
self.total_video = 0
|
|
|
|
# boundary score
|
|
self.max_proposal = max_proposal
|
|
self.AR_at_AN = [[] for _ in range(max_proposal)]
|
|
|
|
def update(self, batch_id, data, outputs):
|
|
"""update metrics during each iter
|
|
"""
|
|
groundTruth = data[1]
|
|
|
|
predicted = outputs['predict']
|
|
output_np = outputs['output_np']
|
|
|
|
outputs_np = predicted.numpy()
|
|
outputs_arr = output_np.numpy()[0, :]
|
|
gt_np = groundTruth.numpy()[0, :]
|
|
|
|
recognition = []
|
|
for i in range(outputs_np.shape[0]):
|
|
recognition = np.concatenate((recognition, [
|
|
list(self.actions_dict.keys())[list(
|
|
self.actions_dict.values()).index(outputs_np[i])]
|
|
]))
|
|
recog_content = list(recognition)
|
|
|
|
gt_content = []
|
|
for i in range(gt_np.shape[0]):
|
|
gt_content = np.concatenate((gt_content, [
|
|
list(self.actions_dict.keys())[list(
|
|
self.actions_dict.values()).index(gt_np[i])]
|
|
]))
|
|
gt_content = list(gt_content)
|
|
|
|
pred_boundary = get_labels_scores_start_end_time(
|
|
outputs_arr, recog_content, self.actions_dict)
|
|
gt_boundary = get_labels_scores_start_end_time(
|
|
np.ones(outputs_arr.shape), gt_content, self.actions_dict)
|
|
|
|
# cls score
|
|
correct = 0
|
|
total = 0
|
|
edit = 0
|
|
|
|
for i in range(len(gt_content)):
|
|
total += 1
|
|
#accumulate
|
|
self.total_frame += 1
|
|
|
|
if gt_content[i] == recog_content[i]:
|
|
correct += 1
|
|
#accumulate
|
|
self.total_correct += 1
|
|
|
|
edit_num = edit_score(recog_content, gt_content)
|
|
edit += edit_num
|
|
self.total_edit += edit_num
|
|
|
|
for s in range(self.overlap_len):
|
|
tp1, fp1, fn1 = f_score(recog_content, gt_content, self.overlap[s])
|
|
|
|
# accumulate
|
|
self.cls_tp[s] += tp1
|
|
self.cls_fp[s] += fp1
|
|
self.cls_fn[s] += fn1
|
|
|
|
# accumulate
|
|
self.total_video += 1
|
|
|
|
# proposal score
|
|
for AN in range(self.max_proposal):
|
|
AR = boundary_AR(pred_boundary,
|
|
gt_boundary,
|
|
self.overlap,
|
|
max_proposal=(AN + 1))
|
|
self.AR_at_AN[AN].append(AR)
|
|
|
|
def accumulate(self):
|
|
"""accumulate metrics when finished all iters.
|
|
"""
|
|
# cls metric
|
|
Acc = 100 * float(self.total_correct) / self.total_frame
|
|
Edit = (1.0 * self.total_edit) / self.total_video
|
|
Fscore = dict()
|
|
for s in range(self.overlap_len):
|
|
precision = self.cls_tp[s] / float(self.cls_tp[s] + self.cls_fp[s])
|
|
recall = self.cls_tp[s] / float(self.cls_tp[s] + self.cls_fn[s])
|
|
|
|
f1 = 2.0 * (precision * recall) / (precision + recall)
|
|
|
|
f1 = np.nan_to_num(f1) * 100
|
|
Fscore[self.overlap[s]] = f1
|
|
|
|
# proposal metric
|
|
proposal_AUC = np.array(self.AR_at_AN) * 100
|
|
AUC = np.mean(proposal_AUC)
|
|
AR_at_AN1 = np.mean(proposal_AUC[0, :])
|
|
AR_at_AN5 = np.mean(proposal_AUC[4, :])
|
|
AR_at_AN15 = np.mean(proposal_AUC[14, :])
|
|
|
|
# log metric
|
|
log_mertic_info = "dataset model performence: "
|
|
# preds ensemble
|
|
log_mertic_info += "Acc: {:.4f}, ".format(Acc)
|
|
log_mertic_info += 'Edit: {:.4f}, '.format(Edit)
|
|
for s in range(len(self.overlap)):
|
|
log_mertic_info += 'F1@{:0.2f}: {:.4f}, '.format(
|
|
self.overlap[s], Fscore[self.overlap[s]])
|
|
|
|
# boundary metric
|
|
log_mertic_info += "Auc: {:.4f}, ".format(AUC)
|
|
log_mertic_info += "AR@AN1: {:.4f}, ".format(AR_at_AN1)
|
|
log_mertic_info += "AR@AN5: {:.4f}, ".format(AR_at_AN5)
|
|
log_mertic_info += "AR@AN15: {:.4f}, ".format(AR_at_AN15)
|
|
logger.info(log_mertic_info)
|
|
|
|
# log metric
|
|
metric_dict = dict()
|
|
metric_dict['Acc'] = Acc
|
|
metric_dict['Edit'] = Edit
|
|
for s in range(len(self.overlap)):
|
|
metric_dict['F1@{:0.2f}'.format(
|
|
self.overlap[s])] = Fscore[self.overlap[s]]
|
|
metric_dict['Auc'] = AUC
|
|
metric_dict['AR@AN1'] = AR_at_AN1
|
|
metric_dict['AR@AN5'] = AR_at_AN5
|
|
metric_dict['AR@AN15'] = AR_at_AN15
|
|
|
|
# clear for next epoch
|
|
# cls
|
|
self.cls_tp = np.zeros(self.overlap_len)
|
|
self.cls_fp = np.zeros(self.overlap_len)
|
|
self.cls_fn = np.zeros(self.overlap_len)
|
|
self.total_correct = 0
|
|
self.total_edit = 0
|
|
self.total_frame = 0
|
|
self.total_video = 0
|
|
# proposal
|
|
self.AR_at_AN = [[] for _ in range(self.max_proposal)]
|
|
|
|
return metric_dict
|