You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
383 lines
14 KiB
Python
383 lines
14 KiB
Python
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import os
|
|
import random
|
|
|
|
import numpy as np
|
|
from PIL import Image
|
|
try:
|
|
import SimpleITK as sitk
|
|
except ImportError as e:
|
|
print(
|
|
f"Warning! {e}, [SimpleITK] package and it's dependencies is required for PP-Care."
|
|
)
|
|
import cv2
|
|
|
|
from ..registry import PIPELINES
|
|
|
|
try:
|
|
import cPickle as pickle
|
|
from cStringIO import StringIO
|
|
except ImportError:
|
|
import pickle
|
|
from io import BytesIO
|
|
|
|
|
|
@PIPELINES.register()
|
|
class Sampler(object):
|
|
"""
|
|
Sample frames id.
|
|
NOTE: Use PIL to read image here, has diff with CV2
|
|
Args:
|
|
num_seg(int): number of segments.
|
|
seg_len(int): number of sampled frames in each segment.
|
|
valid_mode(bool): True or False.
|
|
select_left: Whether to select the frame to the left in the middle when the sampling interval is even in the test mode.
|
|
Returns:
|
|
frames_idx: the index of sampled #frames.
|
|
"""
|
|
def __init__(self,
|
|
num_seg,
|
|
seg_len,
|
|
frame_interval=None,
|
|
valid_mode=False,
|
|
select_left=False,
|
|
dense_sample=False,
|
|
linspace_sample=False,
|
|
use_pil=True):
|
|
self.num_seg = num_seg
|
|
self.seg_len = seg_len
|
|
self.frame_interval = frame_interval
|
|
self.valid_mode = valid_mode
|
|
self.select_left = select_left
|
|
self.dense_sample = dense_sample
|
|
self.linspace_sample = linspace_sample
|
|
self.use_pil = use_pil
|
|
|
|
def _get(self, frames_idx, results):
|
|
data_format = results['format']
|
|
|
|
if data_format == "frame":
|
|
frame_dir = results['frame_dir']
|
|
imgs = []
|
|
for idx in frames_idx:
|
|
img = Image.open(
|
|
os.path.join(frame_dir,
|
|
results['suffix'].format(idx))).convert('RGB')
|
|
imgs.append(img)
|
|
|
|
elif data_format == "MRI":
|
|
frame_dir = results['frame_dir']
|
|
imgs = []
|
|
MRI = sitk.GetArrayFromImage(sitk.ReadImage(frame_dir))
|
|
for idx in frames_idx:
|
|
item = MRI[idx]
|
|
item = cv2.resize(item, (224, 224))
|
|
imgs.append(item)
|
|
|
|
elif data_format == "video":
|
|
if results['backend'] == 'cv2':
|
|
frames = np.array(results['frames'])
|
|
imgs = []
|
|
for idx in frames_idx:
|
|
imgbuf = frames[idx]
|
|
img = Image.fromarray(imgbuf, mode='RGB')
|
|
imgs.append(img)
|
|
elif results['backend'] == 'decord':
|
|
container = results['frames']
|
|
if self.use_pil:
|
|
frames_select = container.get_batch(frames_idx)
|
|
# dearray_to_img
|
|
np_frames = frames_select.asnumpy()
|
|
imgs = []
|
|
for i in range(np_frames.shape[0]):
|
|
imgbuf = np_frames[i]
|
|
imgs.append(Image.fromarray(imgbuf, mode='RGB'))
|
|
else:
|
|
if frames_idx.ndim != 1:
|
|
frames_idx = np.squeeze(frames_idx)
|
|
frame_dict = {
|
|
idx: container[idx].asnumpy()
|
|
for idx in np.unique(frames_idx)
|
|
}
|
|
imgs = [frame_dict[idx] for idx in frames_idx]
|
|
elif results['backend'] == 'pyav':
|
|
imgs = []
|
|
frames = np.array(results['frames'])
|
|
for idx in frames_idx:
|
|
if self.dense_sample:
|
|
idx = idx - 1
|
|
imgbuf = frames[idx]
|
|
imgs.append(imgbuf)
|
|
imgs = np.stack(imgs) # thwc
|
|
else:
|
|
raise NotImplementedError
|
|
else:
|
|
raise NotImplementedError
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
def _get_train_clips(self, num_frames):
|
|
ori_seg_len = self.seg_len * self.frame_interval
|
|
avg_interval = (num_frames - ori_seg_len + 1) // self.num_seg
|
|
|
|
if avg_interval > 0:
|
|
base_offsets = np.arange(self.num_seg) * avg_interval
|
|
clip_offsets = base_offsets + np.random.randint(avg_interval,
|
|
size=self.num_seg)
|
|
elif num_frames > max(self.num_seg, ori_seg_len):
|
|
clip_offsets = np.sort(
|
|
np.random.randint(num_frames - ori_seg_len + 1,
|
|
size=self.num_seg))
|
|
elif avg_interval == 0:
|
|
ratio = (num_frames - ori_seg_len + 1.0) / self.num_seg
|
|
clip_offsets = np.around(np.arange(self.num_seg) * ratio)
|
|
else:
|
|
clip_offsets = np.zeros((self.num_seg, ), dtype=np.int)
|
|
return clip_offsets
|
|
|
|
def _get_test_clips(self, num_frames):
|
|
ori_seg_len = self.seg_len * self.frame_interval
|
|
avg_interval = (num_frames - ori_seg_len + 1) / float(self.num_seg)
|
|
if num_frames > ori_seg_len - 1:
|
|
base_offsets = np.arange(self.num_seg) * avg_interval
|
|
clip_offsets = (base_offsets + avg_interval / 2.0).astype(np.int)
|
|
else:
|
|
clip_offsets = np.zeros((self.num_seg, ), dtype=np.int)
|
|
return clip_offsets
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Args:
|
|
frames_len: length of frames.
|
|
return:
|
|
sampling id.
|
|
"""
|
|
frames_len = int(results['frames_len'])
|
|
frames_idx = []
|
|
if self.frame_interval is not None:
|
|
assert isinstance(self.frame_interval, int)
|
|
if not self.valid_mode:
|
|
offsets = self._get_train_clips(frames_len)
|
|
else:
|
|
offsets = self._get_test_clips(frames_len)
|
|
|
|
offsets = offsets[:, None] + np.arange(
|
|
self.seg_len)[None, :] * self.frame_interval
|
|
offsets = np.concatenate(offsets)
|
|
|
|
offsets = offsets.reshape((-1, self.seg_len))
|
|
offsets = np.mod(offsets, frames_len)
|
|
offsets = np.concatenate(offsets)
|
|
|
|
if results['format'] == 'video':
|
|
frames_idx = offsets
|
|
elif results['format'] == 'frame':
|
|
frames_idx = list(offsets + 1)
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
return self._get(frames_idx, results)
|
|
|
|
if self.linspace_sample:
|
|
if 'start_idx' in results and 'end_idx' in results:
|
|
offsets = np.linspace(results['start_idx'], results['end_idx'],
|
|
self.num_seg)
|
|
else:
|
|
offsets = np.linspace(0, frames_len - 1, self.num_seg)
|
|
offsets = np.clip(offsets, 0, frames_len - 1).astype(np.int64)
|
|
if results['format'] == 'video':
|
|
frames_idx = list(offsets)
|
|
frames_idx = [x % frames_len for x in frames_idx]
|
|
elif results['format'] == 'frame':
|
|
frames_idx = list(offsets + 1)
|
|
|
|
elif results['format'] == 'MRI':
|
|
frames_idx = list(offsets)
|
|
|
|
else:
|
|
raise NotImplementedError
|
|
return self._get(frames_idx, results)
|
|
|
|
average_dur = int(frames_len / self.num_seg)
|
|
if not self.select_left:
|
|
if self.dense_sample: # For ppTSM
|
|
if not self.valid_mode: # train
|
|
sample_pos = max(1, 1 + frames_len - 64)
|
|
t_stride = 64 // self.num_seg
|
|
start_idx = 0 if sample_pos == 1 else np.random.randint(
|
|
0, sample_pos - 1)
|
|
offsets = [(idx * t_stride + start_idx) % frames_len + 1
|
|
for idx in range(self.num_seg)]
|
|
frames_idx = offsets
|
|
else:
|
|
sample_pos = max(1, 1 + frames_len - 64)
|
|
t_stride = 64 // self.num_seg
|
|
start_list = np.linspace(0,
|
|
sample_pos - 1,
|
|
num=10,
|
|
dtype=int)
|
|
offsets = []
|
|
for start_idx in start_list.tolist():
|
|
offsets += [
|
|
(idx * t_stride + start_idx) % frames_len + 1
|
|
for idx in range(self.num_seg)
|
|
]
|
|
frames_idx = offsets
|
|
else:
|
|
for i in range(self.num_seg):
|
|
idx = 0
|
|
if not self.valid_mode:
|
|
if average_dur >= self.seg_len:
|
|
idx = random.randint(0, average_dur - self.seg_len)
|
|
idx += i * average_dur
|
|
elif average_dur >= 1:
|
|
idx += i * average_dur
|
|
else:
|
|
idx = i
|
|
else:
|
|
if average_dur >= self.seg_len:
|
|
idx = (average_dur - 1) // 2
|
|
idx += i * average_dur
|
|
elif average_dur >= 1:
|
|
idx += i * average_dur
|
|
else:
|
|
idx = i
|
|
for jj in range(idx, idx + self.seg_len):
|
|
if results['format'] == 'video':
|
|
frames_idx.append(int(jj % frames_len))
|
|
elif results['format'] == 'frame':
|
|
frames_idx.append(jj + 1)
|
|
|
|
elif results['format'] == 'MRI':
|
|
frames_idx.append(jj)
|
|
else:
|
|
raise NotImplementedError
|
|
return self._get(frames_idx, results)
|
|
|
|
else: # for TSM
|
|
if not self.valid_mode:
|
|
if average_dur > 0:
|
|
offsets = np.multiply(list(range(self.num_seg)),
|
|
average_dur) + np.random.randint(
|
|
average_dur, size=self.num_seg)
|
|
elif frames_len > self.num_seg:
|
|
offsets = np.sort(
|
|
np.random.randint(frames_len, size=self.num_seg))
|
|
else:
|
|
offsets = np.zeros(shape=(self.num_seg, ))
|
|
else:
|
|
if frames_len > self.num_seg:
|
|
average_dur_float = frames_len / self.num_seg
|
|
offsets = np.array([
|
|
int(average_dur_float / 2.0 + average_dur_float * x)
|
|
for x in range(self.num_seg)
|
|
])
|
|
else:
|
|
offsets = np.zeros(shape=(self.num_seg, ))
|
|
|
|
if results['format'] == 'video':
|
|
frames_idx = list(offsets)
|
|
frames_idx = [x % frames_len for x in frames_idx]
|
|
elif results['format'] == 'frame':
|
|
frames_idx = list(offsets + 1)
|
|
|
|
elif results['format'] == 'MRI':
|
|
frames_idx = list(offsets)
|
|
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
return self._get(frames_idx, results)
|
|
|
|
|
|
@PIPELINES.register()
|
|
class SamplerPkl(object):
|
|
"""
|
|
Sample frames id.
|
|
NOTE: Use PIL to read image here, has diff with CV2
|
|
Args:
|
|
num_seg(int): number of segments.
|
|
seg_len(int): number of sampled frames in each segment.
|
|
mode(str): 'train', 'valid'
|
|
Returns:
|
|
frames_idx: the index of sampled #frames.
|
|
"""
|
|
def __init__(self, num_seg, seg_len, backend='pillow', valid_mode=False):
|
|
self.num_seg = num_seg
|
|
self.seg_len = seg_len
|
|
self.valid_mode = valid_mode
|
|
self.backend = backend
|
|
|
|
def _get(self, buf):
|
|
if isinstance(buf, str):
|
|
img = Image.open(StringIO(buf))
|
|
else:
|
|
img = Image.open(BytesIO(buf))
|
|
img = img.convert('RGB')
|
|
if self.backend != 'pillow':
|
|
img = np.array(img)
|
|
return img
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Args:
|
|
frames_len: length of frames.
|
|
return:
|
|
sampling id.
|
|
"""
|
|
filename = results['frame_dir']
|
|
data_loaded = pickle.load(open(filename, 'rb'), encoding='bytes')
|
|
video_name, label, frames = data_loaded
|
|
if isinstance(label, dict):
|
|
label = label['动作类型']
|
|
results['labels'] = label
|
|
elif len(label) == 1:
|
|
results['labels'] = int(label[0])
|
|
else:
|
|
results['labels'] = int(label[0]) if random.random() < 0.5 else int(
|
|
label[1])
|
|
results['frames_len'] = len(frames)
|
|
frames_len = results['frames_len']
|
|
average_dur = int(int(frames_len) / self.num_seg)
|
|
imgs = []
|
|
for i in range(self.num_seg):
|
|
idx = 0
|
|
if not self.valid_mode:
|
|
if average_dur >= self.seg_len:
|
|
idx = random.randint(0, average_dur - self.seg_len)
|
|
idx += i * average_dur
|
|
elif average_dur >= 1:
|
|
idx += i * average_dur
|
|
else:
|
|
idx = i
|
|
else:
|
|
if average_dur >= self.seg_len:
|
|
idx = (average_dur - 1) // 2
|
|
idx += i * average_dur
|
|
elif average_dur >= 1:
|
|
idx += i * average_dur
|
|
else:
|
|
idx = i
|
|
|
|
for jj in range(idx, idx + self.seg_len):
|
|
imgbuf = frames[int(jj % results['frames_len'])]
|
|
img = self._get(imgbuf)
|
|
imgs.append(img)
|
|
results['backend'] = self.backend
|
|
results['imgs'] = imgs
|
|
|
|
return results
|