You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1428 lines
50 KiB
Python
1428 lines
50 KiB
Python
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import math
|
|
import random
|
|
from collections.abc import Sequence
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import paddle
|
|
import paddle.nn.functional as F
|
|
from PIL import Image
|
|
|
|
from ..registry import PIPELINES
|
|
|
|
|
|
@PIPELINES.register()
|
|
class Scale(object):
|
|
"""
|
|
Scale images.
|
|
Args:
|
|
short_size(float | int): Short size of an image will be scaled to the short_size.
|
|
fixed_ratio(bool): Set whether to zoom according to a fixed ratio. default: True
|
|
do_round(bool): Whether to round up when calculating the zoom ratio. default: False
|
|
backend(str): Choose pillow or cv2 as the graphics processing backend. default: 'pillow'
|
|
"""
|
|
def __init__(self,
|
|
short_size,
|
|
fixed_ratio=True,
|
|
keep_ratio=None,
|
|
do_round=False,
|
|
backend='pillow'):
|
|
self.short_size = short_size
|
|
assert (fixed_ratio and not keep_ratio) or (not fixed_ratio), \
|
|
f"fixed_ratio and keep_ratio cannot be true at the same time"
|
|
self.fixed_ratio = fixed_ratio
|
|
self.keep_ratio = keep_ratio
|
|
self.do_round = do_round
|
|
|
|
assert backend in [
|
|
'pillow', 'cv2'
|
|
], f"Scale's backend must be pillow or cv2, but get {backend}"
|
|
self.backend = backend
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs resize operations.
|
|
Args:
|
|
imgs (Sequence[PIL.Image]): List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
resized_imgs: List where each item is a PIL.Image after scaling.
|
|
"""
|
|
imgs = results['imgs']
|
|
resized_imgs = []
|
|
for i in range(len(imgs)):
|
|
img = imgs[i]
|
|
if isinstance(img, np.ndarray):
|
|
h, w, _ = img.shape
|
|
elif isinstance(img, Image.Image):
|
|
w, h = img.size
|
|
else:
|
|
raise NotImplementedError
|
|
if (w <= h and w == self.short_size) or (h <= w
|
|
and h == self.short_size):
|
|
if self.backend == 'pillow' and not isinstance(
|
|
img, Image.Image):
|
|
img = Image.fromarray(img)
|
|
resized_imgs.append(img)
|
|
continue
|
|
|
|
if w <= h:
|
|
ow = self.short_size
|
|
if self.fixed_ratio:
|
|
oh = int(self.short_size * 4.0 / 3.0)
|
|
elif self.keep_ratio is False:
|
|
oh = self.short_size
|
|
else:
|
|
scale_factor = self.short_size / w
|
|
oh = int(h * float(scale_factor) +
|
|
0.5) if self.do_round else int(h *
|
|
self.short_size / w)
|
|
ow = int(w * float(scale_factor) +
|
|
0.5) if self.do_round else self.short_size
|
|
else:
|
|
oh = self.short_size
|
|
if self.fixed_ratio:
|
|
ow = int(self.short_size * 4.0 / 3.0)
|
|
elif self.keep_ratio is False:
|
|
ow = self.short_size
|
|
else:
|
|
scale_factor = self.short_size / h
|
|
oh = int(h * float(scale_factor) +
|
|
0.5) if self.do_round else self.short_size
|
|
ow = int(w * float(scale_factor) +
|
|
0.5) if self.do_round else int(w *
|
|
self.short_size / h)
|
|
if self.backend == 'pillow':
|
|
resized_imgs.append(img.resize((ow, oh), Image.BILINEAR))
|
|
elif self.backend == 'cv2' and (self.keep_ratio is not None):
|
|
resized_imgs.append(
|
|
cv2.resize(img, (ow, oh), interpolation=cv2.INTER_LINEAR))
|
|
else:
|
|
resized_imgs.append(
|
|
Image.fromarray(
|
|
cv2.resize(np.asarray(img), (ow, oh),
|
|
interpolation=cv2.INTER_LINEAR)))
|
|
results['imgs'] = resized_imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class RandomCrop(object):
|
|
"""
|
|
Random crop images.
|
|
Args:
|
|
target_size(int): Random crop a square with the target_size from an image.
|
|
"""
|
|
def __init__(self, target_size):
|
|
self.target_size = target_size
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs random crop operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
crop_imgs: List where each item is a PIL.Image after random crop.
|
|
"""
|
|
imgs = results['imgs']
|
|
if 'backend' in results and results['backend'] == 'pyav': # [c,t,h,w]
|
|
h, w = imgs.shape[2:]
|
|
else:
|
|
w, h = imgs[0].size
|
|
th, tw = self.target_size, self.target_size
|
|
|
|
assert (w >= self.target_size) and (h >= self.target_size), \
|
|
"image width({}) and height({}) should be larger than crop size".format(
|
|
w, h, self.target_size)
|
|
|
|
crop_images = []
|
|
if 'backend' in results and results['backend'] == 'pyav':
|
|
x1 = np.random.randint(0, w - tw)
|
|
y1 = np.random.randint(0, h - th)
|
|
crop_images = imgs[:, :, y1:y1 + th, x1:x1 + tw] # [C, T, th, tw]
|
|
else:
|
|
x1 = random.randint(0, w - tw)
|
|
y1 = random.randint(0, h - th)
|
|
for img in imgs:
|
|
if w == tw and h == th:
|
|
crop_images.append(img)
|
|
else:
|
|
crop_images.append(img.crop((x1, y1, x1 + tw, y1 + th)))
|
|
results['imgs'] = crop_images
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class RandomResizedCrop(RandomCrop):
|
|
def __init__(self,
|
|
area_range=(0.08, 1.0),
|
|
aspect_ratio_range=(3 / 4, 4 / 3),
|
|
target_size=224,
|
|
backend='cv2'):
|
|
|
|
self.area_range = area_range
|
|
self.aspect_ratio_range = aspect_ratio_range
|
|
self.target_size = target_size
|
|
self.backend = backend
|
|
|
|
@staticmethod
|
|
def get_crop_bbox(img_shape,
|
|
area_range,
|
|
aspect_ratio_range,
|
|
max_attempts=10):
|
|
|
|
assert 0 < area_range[0] <= area_range[1] <= 1
|
|
assert 0 < aspect_ratio_range[0] <= aspect_ratio_range[1]
|
|
|
|
img_h, img_w = img_shape
|
|
area = img_h * img_w
|
|
|
|
min_ar, max_ar = aspect_ratio_range
|
|
aspect_ratios = np.exp(
|
|
np.random.uniform(np.log(min_ar), np.log(max_ar),
|
|
size=max_attempts))
|
|
target_areas = np.random.uniform(*area_range, size=max_attempts) * area
|
|
candidate_crop_w = np.round(np.sqrt(target_areas *
|
|
aspect_ratios)).astype(np.int32)
|
|
candidate_crop_h = np.round(np.sqrt(target_areas /
|
|
aspect_ratios)).astype(np.int32)
|
|
|
|
for i in range(max_attempts):
|
|
crop_w = candidate_crop_w[i]
|
|
crop_h = candidate_crop_h[i]
|
|
if crop_h <= img_h and crop_w <= img_w:
|
|
x_offset = random.randint(0, img_w - crop_w)
|
|
y_offset = random.randint(0, img_h - crop_h)
|
|
return x_offset, y_offset, x_offset + crop_w, y_offset + crop_h
|
|
|
|
# Fallback
|
|
crop_size = min(img_h, img_w)
|
|
x_offset = (img_w - crop_size) // 2
|
|
y_offset = (img_h - crop_size) // 2
|
|
return x_offset, y_offset, x_offset + crop_size, y_offset + crop_size
|
|
|
|
def __call__(self, results):
|
|
imgs = results['imgs']
|
|
if self.backend == 'pillow':
|
|
img_w, img_h = imgs[0].size
|
|
elif self.backend == 'cv2':
|
|
img_h, img_w, _ = imgs[0].shape
|
|
elif self.backend == 'pyav':
|
|
img_h, img_w = imgs.shape[2:] # [cthw]
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
left, top, right, bottom = self.get_crop_bbox(
|
|
(img_h, img_w), self.area_range, self.aspect_ratio_range)
|
|
|
|
if self.backend == 'pillow':
|
|
img_w, img_h = imgs[0].size
|
|
imgs = [img.crop(left, top, right, bottom) for img in imgs]
|
|
elif self.backend == 'cv2':
|
|
img_h, img_w, _ = imgs[0].shape
|
|
imgs = [img[top:bottom, left:right] for img in imgs]
|
|
elif self.backend == 'pyav':
|
|
img_h, img_w = imgs.shape[2:] # [cthw]
|
|
imgs = imgs[:, :, top:bottom, left:right]
|
|
else:
|
|
raise NotImplementedError
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class CenterCrop(object):
|
|
"""
|
|
Center crop images.
|
|
Args:
|
|
target_size(int): Center crop a square with the target_size from an image.
|
|
do_round(bool): Whether to round up the coordinates of the upper left corner of the cropping area. default: True
|
|
"""
|
|
def __init__(self, target_size, do_round=True, backend='pillow'):
|
|
self.target_size = target_size
|
|
self.do_round = do_round
|
|
self.backend = backend
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs Center crop operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
ccrop_imgs: List where each item is a PIL.Image after Center crop.
|
|
"""
|
|
imgs = results['imgs']
|
|
ccrop_imgs = []
|
|
th, tw = self.target_size, self.target_size
|
|
if isinstance(imgs, paddle.Tensor):
|
|
h, w = imgs.shape[-2:]
|
|
x1 = int(round((w - tw) / 2.0)) if self.do_round else (w - tw) // 2
|
|
y1 = int(round((h - th) / 2.0)) if self.do_round else (h - th) // 2
|
|
ccrop_imgs = imgs[:, :, y1:y1 + th, x1:x1 + tw]
|
|
else:
|
|
for img in imgs:
|
|
if self.backend == 'pillow':
|
|
w, h = img.size
|
|
elif self.backend == 'cv2':
|
|
h, w, _ = img.shape
|
|
else:
|
|
raise NotImplementedError
|
|
assert (w >= self.target_size) and (h >= self.target_size), \
|
|
"image width({}) and height({}) should be larger than crop size".format(
|
|
w, h, self.target_size)
|
|
x1 = int(round(
|
|
(w - tw) / 2.0)) if self.do_round else (w - tw) // 2
|
|
y1 = int(round(
|
|
(h - th) / 2.0)) if self.do_round else (h - th) // 2
|
|
if self.backend == 'cv2':
|
|
ccrop_imgs.append(img[y1:y1 + th, x1:x1 + tw])
|
|
elif self.backend == 'pillow':
|
|
ccrop_imgs.append(img.crop((x1, y1, x1 + tw, y1 + th)))
|
|
results['imgs'] = ccrop_imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class MultiScaleCrop(object):
|
|
"""
|
|
Random crop images in with multiscale sizes
|
|
Args:
|
|
target_size(int): Random crop a square with the target_size from an image.
|
|
scales(int): List of candidate cropping scales.
|
|
max_distort(int): Maximum allowable deformation combination distance.
|
|
fix_crop(int): Whether to fix the cutting start point.
|
|
allow_duplication(int): Whether to allow duplicate candidate crop starting points.
|
|
more_fix_crop(int): Whether to allow more cutting starting points.
|
|
"""
|
|
def __init__(
|
|
self,
|
|
target_size, # NOTE: named target size now, but still pass short size in it!
|
|
scales=None,
|
|
max_distort=1,
|
|
fix_crop=True,
|
|
allow_duplication=False,
|
|
more_fix_crop=True,
|
|
backend='pillow'):
|
|
|
|
self.target_size = target_size
|
|
self.scales = scales if scales else [1, .875, .75, .66]
|
|
self.max_distort = max_distort
|
|
self.fix_crop = fix_crop
|
|
self.allow_duplication = allow_duplication
|
|
self.more_fix_crop = more_fix_crop
|
|
assert backend in [
|
|
'pillow', 'cv2'
|
|
], f"MultiScaleCrop's backend must be pillow or cv2, but get {backend}"
|
|
self.backend = backend
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs MultiScaleCrop operations.
|
|
Args:
|
|
imgs: List where wach item is a PIL.Image.
|
|
XXX:
|
|
results:
|
|
|
|
"""
|
|
imgs = results['imgs']
|
|
|
|
input_size = [self.target_size, self.target_size]
|
|
|
|
im_size = imgs[0].size
|
|
|
|
# get random crop offset
|
|
def _sample_crop_size(im_size):
|
|
image_w, image_h = im_size[0], im_size[1]
|
|
|
|
base_size = min(image_w, image_h)
|
|
crop_sizes = [int(base_size * x) for x in self.scales]
|
|
crop_h = [
|
|
input_size[1] if abs(x - input_size[1]) < 3 else x
|
|
for x in crop_sizes
|
|
]
|
|
crop_w = [
|
|
input_size[0] if abs(x - input_size[0]) < 3 else x
|
|
for x in crop_sizes
|
|
]
|
|
|
|
pairs = []
|
|
for i, h in enumerate(crop_h):
|
|
for j, w in enumerate(crop_w):
|
|
if abs(i - j) <= self.max_distort:
|
|
pairs.append((w, h))
|
|
crop_pair = random.choice(pairs)
|
|
if not self.fix_crop:
|
|
w_offset = random.randint(0, image_w - crop_pair[0])
|
|
h_offset = random.randint(0, image_h - crop_pair[1])
|
|
else:
|
|
w_step = (image_w - crop_pair[0]) / 4
|
|
h_step = (image_h - crop_pair[1]) / 4
|
|
|
|
ret = list()
|
|
ret.append((0, 0)) # upper left
|
|
if self.allow_duplication or w_step != 0:
|
|
ret.append((4 * w_step, 0)) # upper right
|
|
if self.allow_duplication or h_step != 0:
|
|
ret.append((0, 4 * h_step)) # lower left
|
|
if self.allow_duplication or (h_step != 0 and w_step != 0):
|
|
ret.append((4 * w_step, 4 * h_step)) # lower right
|
|
if self.allow_duplication or (h_step != 0 or w_step != 0):
|
|
ret.append((2 * w_step, 2 * h_step)) # center
|
|
|
|
if self.more_fix_crop:
|
|
ret.append((0, 2 * h_step)) # center left
|
|
ret.append((4 * w_step, 2 * h_step)) # center right
|
|
ret.append((2 * w_step, 4 * h_step)) # lower center
|
|
ret.append((2 * w_step, 0 * h_step)) # upper center
|
|
|
|
ret.append((1 * w_step, 1 * h_step)) # upper left quarter
|
|
ret.append((3 * w_step, 1 * h_step)) # upper right quarter
|
|
ret.append((1 * w_step, 3 * h_step)) # lower left quarter
|
|
ret.append((3 * w_step, 3 * h_step)) # lower righ quarter
|
|
|
|
w_offset, h_offset = random.choice(ret)
|
|
|
|
return crop_pair[0], crop_pair[1], w_offset, h_offset
|
|
|
|
crop_w, crop_h, offset_w, offset_h = _sample_crop_size(im_size)
|
|
crop_img_group = [
|
|
img.crop((offset_w, offset_h, offset_w + crop_w, offset_h + crop_h))
|
|
for img in imgs
|
|
]
|
|
if self.backend == 'pillow':
|
|
ret_img_group = [
|
|
img.resize((input_size[0], input_size[1]), Image.BILINEAR)
|
|
for img in crop_img_group
|
|
]
|
|
else:
|
|
ret_img_group = [
|
|
Image.fromarray(
|
|
cv2.resize(np.asarray(img),
|
|
dsize=(input_size[0], input_size[1]),
|
|
interpolation=cv2.INTER_LINEAR))
|
|
for img in crop_img_group
|
|
]
|
|
results['imgs'] = ret_img_group
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class RandomFlip(object):
|
|
"""
|
|
Random Flip images.
|
|
Args:
|
|
p(float): Random flip images with the probability p.
|
|
"""
|
|
def __init__(self, p=0.5):
|
|
self.p = p
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs random flip operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
flip_imgs: List where each item is a PIL.Image after random flip.
|
|
"""
|
|
imgs = results['imgs']
|
|
v = random.random()
|
|
if v < self.p:
|
|
if isinstance(imgs, paddle.Tensor):
|
|
results['imgs'] = paddle.flip(imgs, axis=[3])
|
|
elif isinstance(imgs[0], np.ndarray):
|
|
results['imgs'] = [cv2.flip(img, 1, img) for img in imgs
|
|
] # [[h,w,c], [h,w,c], ..., [h,w,c]]
|
|
else:
|
|
results['imgs'] = [
|
|
img.transpose(Image.FLIP_LEFT_RIGHT) for img in imgs
|
|
]
|
|
else:
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class RandomBrightness(object):
|
|
"""
|
|
Random Brightness images.
|
|
Args:
|
|
p(float): Random brightness images with the probability p.
|
|
"""
|
|
def __init__(self, p=0.1, brightness=1):
|
|
self.p = p
|
|
self.brightness = brightness
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs random brightness operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
brightness_imgs: List where each item is a PIL.Image after random brightness.
|
|
"""
|
|
imgs = results['imgs']
|
|
v = random.random()
|
|
|
|
if v < self.p:
|
|
transform = ColorJitter(brightness=self.brightness)
|
|
results['imgs'] = [transform(img) for img in imgs]
|
|
else:
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class RandomSaturation(object):
|
|
"""
|
|
Random Saturation images.
|
|
Args:
|
|
p(float): Random saturation images with the probability p.
|
|
"""
|
|
def __init__(self, p=0.1, saturation=2):
|
|
self.p = p
|
|
self.saturation = saturation
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs random saturation operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
saturation_imgs: List where each item is a PIL.Image after random saturation.
|
|
"""
|
|
imgs = results['imgs']
|
|
v = random.random()
|
|
|
|
if v < self.p:
|
|
transform = ColorJitter(saturation=self.saturation)
|
|
results['imgs'] = [transform(img) for img in imgs]
|
|
else:
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class RandomHue(object):
|
|
"""
|
|
Random Hue images.
|
|
Args:
|
|
p(float): Random hue images with the probability p.
|
|
"""
|
|
def __init__(self, p=0.1, hue=0.5):
|
|
self.p = p
|
|
self.hue = hue
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs random hue operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
hue_imgs: List where each item is a PIL.Image after random hue.
|
|
"""
|
|
imgs = results['imgs']
|
|
v = random.random()
|
|
|
|
if v < self.p:
|
|
transform = ColorJitter(hue=self.hue)
|
|
results['imgs'] = [transform(img) for img in imgs]
|
|
else:
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class RandomGamma(object):
|
|
"""
|
|
Random Gamma images.
|
|
Args:
|
|
p(float): Random gamma images with the probability p.
|
|
gamma (float): Non negative real number, same as `\\gamma` in the equation.
|
|
gamma larger than 1 make the shadows darker,
|
|
while gamma smaller than 1 make dark regions lighter.
|
|
"""
|
|
def __init__(self, p=0.1, gamma=0.2):
|
|
self.p = p
|
|
self.value = [1 - gamma, 1 + gamma]
|
|
self.value[0] = max(self.value[0], 0)
|
|
|
|
def _adust_gamma(self, img, gamma, gain=1.0):
|
|
flag = False
|
|
if isinstance(img, np.ndarray):
|
|
flag = True
|
|
img = Image.fromarray(img)
|
|
input_mode = img.mode
|
|
img = img.convert("RGB")
|
|
gamma_map = [
|
|
int((255 + 1 - 1e-3) * gain * pow(ele / 255.0, gamma))
|
|
for ele in range(256)
|
|
] * 3
|
|
img = img.point(
|
|
gamma_map) # use PIL's point-function to accelerate this part
|
|
img = img.convert(input_mode)
|
|
if flag:
|
|
img = np.array(img)
|
|
return img
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs random gamma operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
gamma_imgs: List where each item is a PIL.Image after random gamma.
|
|
"""
|
|
imgs = results['imgs']
|
|
v = random.random()
|
|
|
|
if v < self.p:
|
|
gamma = random.uniform(self.value[0], self.value[1])
|
|
results['imgs'] = [self._adust_gamma(img, gamma) for img in imgs]
|
|
else:
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class Image2Array(object):
|
|
"""
|
|
transfer PIL.Image to Numpy array and transpose dimensions from 'dhwc' to 'dchw'.
|
|
Args:
|
|
transpose: whether to transpose or not, default True, False for slowfast.
|
|
"""
|
|
def __init__(self, transpose=True, data_format='tchw'):
|
|
assert data_format in [
|
|
'tchw', 'cthw'
|
|
], f"Target format must in ['tchw', 'cthw'], but got {data_format}"
|
|
self.transpose = transpose
|
|
self.data_format = data_format
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs Image to NumpyArray operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
np_imgs: Numpy array.
|
|
"""
|
|
imgs = results['imgs']
|
|
if 'backend' in results and results[
|
|
'backend'] == 'pyav': # [T,H,W,C] in [0, 1]
|
|
if self.transpose:
|
|
if self.data_format == 'tchw':
|
|
t_imgs = imgs.transpose((0, 3, 1, 2)) # tchw
|
|
else:
|
|
t_imgs = imgs.transpose((3, 0, 1, 2)) # cthw
|
|
results['imgs'] = t_imgs
|
|
else:
|
|
t_imgs = np.stack(imgs).astype('float32')
|
|
if self.transpose:
|
|
if self.data_format == 'tchw':
|
|
t_imgs = t_imgs.transpose(0, 3, 1, 2) # tchw
|
|
else:
|
|
t_imgs = t_imgs.transpose(3, 0, 1, 2) # cthw
|
|
results['imgs'] = t_imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class Normalization(object):
|
|
"""
|
|
Normalization.
|
|
Args:
|
|
mean(Sequence[float]): mean values of different channels.
|
|
std(Sequence[float]): std values of different channels.
|
|
tensor_shape(list): size of mean, default [3,1,1]. For slowfast, [1,1,1,3]
|
|
"""
|
|
def __init__(self, mean, std, tensor_shape=[3, 1, 1], inplace=False):
|
|
if not isinstance(mean, Sequence):
|
|
raise TypeError(
|
|
f'Mean must be list, tuple or np.ndarray, but got {type(mean)}')
|
|
if not isinstance(std, Sequence):
|
|
raise TypeError(
|
|
f'Std must be list, tuple or np.ndarray, but got {type(std)}')
|
|
|
|
self.inplace = inplace
|
|
if not inplace:
|
|
self.mean = np.array(mean).reshape(tensor_shape).astype(np.float32)
|
|
self.std = np.array(std).reshape(tensor_shape).astype(np.float32)
|
|
else:
|
|
self.mean = np.array(mean, dtype=np.float32)
|
|
self.std = np.array(std, dtype=np.float32)
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs normalization operations.
|
|
Args:
|
|
imgs: Numpy array.
|
|
return:
|
|
np_imgs: Numpy array after normalization.
|
|
"""
|
|
if self.inplace:
|
|
n = len(results['imgs'])
|
|
h, w, c = results['imgs'][0].shape
|
|
norm_imgs = np.empty((n, h, w, c), dtype=np.float32)
|
|
for i, img in enumerate(results['imgs']):
|
|
norm_imgs[i] = img
|
|
|
|
for img in norm_imgs: # [n,h,w,c]
|
|
mean = np.float64(self.mean.reshape(1, -1)) # [1, 3]
|
|
stdinv = 1 / np.float64(self.std.reshape(1, -1)) # [1, 3]
|
|
cv2.subtract(img, mean, img)
|
|
cv2.multiply(img, stdinv, img)
|
|
else:
|
|
imgs = results['imgs']
|
|
norm_imgs = imgs / 255.0
|
|
norm_imgs -= self.mean
|
|
norm_imgs /= self.std
|
|
if 'backend' in results and results['backend'] == 'pyav':
|
|
norm_imgs = paddle.to_tensor(norm_imgs, dtype=paddle.float32)
|
|
results['imgs'] = norm_imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class JitterScale(object):
|
|
"""
|
|
Scale image, while the target short size is randomly select between min_size and max_size.
|
|
Args:
|
|
min_size: Lower bound for random sampler.
|
|
max_size: Higher bound for random sampler.
|
|
"""
|
|
def __init__(self,
|
|
min_size,
|
|
max_size,
|
|
short_cycle_factors=[0.5, 0.7071],
|
|
default_min_size=256):
|
|
self.default_min_size = default_min_size
|
|
self.orig_min_size = self.min_size = min_size
|
|
self.max_size = max_size
|
|
self.short_cycle_factors = short_cycle_factors
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs jitter resize operations.
|
|
Args:
|
|
imgs (Sequence[PIL.Image]): List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
resized_imgs: List where each item is a PIL.Image after scaling.
|
|
"""
|
|
short_cycle_idx = results.get('short_cycle_idx')
|
|
if short_cycle_idx in [0, 1]:
|
|
self.min_size = int(
|
|
round(self.short_cycle_factors[short_cycle_idx] *
|
|
self.default_min_size))
|
|
else:
|
|
self.min_size = self.orig_min_size
|
|
|
|
imgs = results['imgs']
|
|
size = int(round(np.random.uniform(self.min_size, self.max_size)))
|
|
assert (len(imgs) >= 1), \
|
|
"len(imgs):{} should be larger than 1".format(len(imgs))
|
|
|
|
if 'backend' in results and results['backend'] == 'pyav':
|
|
height, width = imgs.shape[2:]
|
|
else:
|
|
width, height = imgs[0].size
|
|
if (width <= height and width == size) or (height <= width
|
|
and height == size):
|
|
return results
|
|
|
|
new_width = size
|
|
new_height = size
|
|
if width < height:
|
|
new_height = int(math.floor((float(height) / width) * size))
|
|
else:
|
|
new_width = int(math.floor((float(width) / height) * size))
|
|
|
|
if 'backend' in results and results['backend'] == 'pyav':
|
|
frames_resize = F.interpolate(imgs,
|
|
size=(new_height, new_width),
|
|
mode="bilinear",
|
|
align_corners=False) # [c,t,h,w]
|
|
else:
|
|
frames_resize = []
|
|
for j in range(len(imgs)):
|
|
img = imgs[j]
|
|
scale_img = img.resize((new_width, new_height), Image.BILINEAR)
|
|
frames_resize.append(scale_img)
|
|
|
|
results['imgs'] = frames_resize
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class MultiCenterCrop(object):
|
|
"""
|
|
center crop, left center crop right center crop
|
|
Args:
|
|
target_size(int): Random crop a square with the target_size from an image.
|
|
"""
|
|
def __init__(self, target_size):
|
|
self.target_size = target_size
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs random crop operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
crop_imgs: List where each item is a PIL.Image after random crop.
|
|
"""
|
|
imgs = results['imgs']
|
|
if 'backend' in results and results['backend'] == 'pyav': # [c,t,h,w]
|
|
h, w = imgs.shape[2:]
|
|
else:
|
|
w, h = imgs[0].size
|
|
th, tw = self.target_size, self.target_size
|
|
|
|
assert (w >= self.target_size) and (h >= self.target_size), \
|
|
"image width({}) and height({}) should be larger than crop size".format(
|
|
w, h, self.target_size)
|
|
|
|
crop_images = []
|
|
#just for tensor
|
|
crop_imgs_center = []
|
|
crop_imgs_left = []
|
|
crop_imgs_right = []
|
|
if 'backend' in results and results['backend'] == 'pyav':
|
|
#center_corp
|
|
x1 = 0
|
|
if w > self.target_size:
|
|
x1 = int((w - self.target_size) / 2.0)
|
|
y1 = 0
|
|
if h > self.target_size:
|
|
y1 = int((h - self.target_size) / 2.0)
|
|
crop_imgs_center = imgs[:, :, y1:y1 + th,
|
|
x1:x1 + tw].numpy() # [C, T, th, tw]
|
|
#left_crop
|
|
x1 = 0
|
|
y1 = 0
|
|
if h > self.target_size:
|
|
y1 = int((h - self.target_size) / 2.0)
|
|
crop_imgs_left = imgs[:, :, y1:y1 + th, x1:x1 + tw].numpy()
|
|
#right_crop
|
|
x1 = 0
|
|
y1 = 0
|
|
if w > self.target_size:
|
|
x1 = w - self.target_size
|
|
if h > self.target_size:
|
|
y1 = int((h - self.target_size) / 2.0)
|
|
crop_imgs_right = imgs[:, :, y1:y1 + th, x1:x1 + tw].numpy()
|
|
crop_imgs = np.concatenate(
|
|
(crop_imgs_center, crop_imgs_left, crop_imgs_right), axis=1)
|
|
crop_images = paddle.to_tensor(crop_imgs)
|
|
|
|
else:
|
|
x1 = 0
|
|
if w > self.target_size:
|
|
x1 = random.randint(0, w - tw)
|
|
y1 = 0
|
|
if h > self.target_size:
|
|
y1 = random.randint(0, h - th)
|
|
for img in imgs:
|
|
if w == tw and h == th:
|
|
crop_images.append(img)
|
|
else:
|
|
crop_images.append(img.crop((x1, y1, x1 + tw, y1 + th)))
|
|
results['imgs'] = crop_images
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class MultiCrop(object):
|
|
"""
|
|
Random crop image.
|
|
This operation can perform multi-crop during multi-clip test, as in slowfast model.
|
|
Args:
|
|
target_size(int): Random crop a square with the target_size from an image.
|
|
"""
|
|
def __init__(self,
|
|
target_size,
|
|
default_crop_size=224,
|
|
short_cycle_factors=[0.5, 0.7071],
|
|
test_mode=False):
|
|
self.orig_target_size = self.target_size = target_size
|
|
self.short_cycle_factors = short_cycle_factors
|
|
self.default_crop_size = default_crop_size
|
|
self.test_mode = test_mode
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Performs random crop operations.
|
|
Args:
|
|
imgs: List where each item is a PIL.Image.
|
|
For example, [PIL.Image0, PIL.Image1, PIL.Image2, ...]
|
|
return:
|
|
crop_imgs: List where each item is a PIL.Image after random crop.
|
|
"""
|
|
imgs = results['imgs']
|
|
spatial_sample_index = results['spatial_sample_index']
|
|
spatial_num_clips = results['spatial_num_clips']
|
|
|
|
short_cycle_idx = results.get('short_cycle_idx')
|
|
if short_cycle_idx in [0, 1]:
|
|
self.target_size = int(
|
|
round(self.short_cycle_factors[short_cycle_idx] *
|
|
self.default_crop_size))
|
|
else:
|
|
self.target_size = self.orig_target_size # use saved value before call
|
|
|
|
w, h = imgs[0].size
|
|
if w == self.target_size and h == self.target_size:
|
|
return results
|
|
|
|
assert (w >= self.target_size) and (h >= self.target_size), \
|
|
"image width({}) and height({}) should be larger than crop size({},{})".format(w, h, self.target_size, self.target_size)
|
|
frames_crop = []
|
|
if not self.test_mode:
|
|
x_offset = random.randint(0, w - self.target_size)
|
|
y_offset = random.randint(0, h - self.target_size)
|
|
else: # multi-crop
|
|
x_gap = int(
|
|
math.ceil((w - self.target_size) / (spatial_num_clips - 1)))
|
|
y_gap = int(
|
|
math.ceil((h - self.target_size) / (spatial_num_clips - 1)))
|
|
if h > w:
|
|
x_offset = int(math.ceil((w - self.target_size) / 2))
|
|
if spatial_sample_index == 0:
|
|
y_offset = 0
|
|
elif spatial_sample_index == spatial_num_clips - 1:
|
|
y_offset = h - self.target_size
|
|
else:
|
|
y_offset = y_gap * spatial_sample_index
|
|
else:
|
|
y_offset = int(math.ceil((h - self.target_size) / 2))
|
|
if spatial_sample_index == 0:
|
|
x_offset = 0
|
|
elif spatial_sample_index == spatial_num_clips - 1:
|
|
x_offset = w - self.target_size
|
|
else:
|
|
x_offset = x_gap * spatial_sample_index
|
|
|
|
for img in imgs:
|
|
nimg = img.crop((x_offset, y_offset, x_offset + self.target_size,
|
|
y_offset + self.target_size))
|
|
frames_crop.append(nimg)
|
|
results['imgs'] = frames_crop
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class PackOutput(object):
|
|
"""
|
|
In slowfast model, we want to get slow pathway from fast pathway based on
|
|
alpha factor.
|
|
Args:
|
|
alpha(int): temporal length of fast/slow
|
|
"""
|
|
def __init__(self, alpha):
|
|
self.alpha = alpha
|
|
|
|
def __call__(self, results):
|
|
fast_pathway = results['imgs']
|
|
|
|
# sample num points between start and end
|
|
slow_idx_start = 0
|
|
slow_idx_end = fast_pathway.shape[0] - 1
|
|
slow_idx_num = fast_pathway.shape[0] // self.alpha
|
|
slow_idxs_select = np.linspace(slow_idx_start, slow_idx_end,
|
|
slow_idx_num).astype("int64")
|
|
slow_pathway = fast_pathway[slow_idxs_select]
|
|
|
|
# T H W C -> C T H W.
|
|
slow_pathway = slow_pathway.transpose(3, 0, 1, 2)
|
|
fast_pathway = fast_pathway.transpose(3, 0, 1, 2)
|
|
|
|
# slow + fast
|
|
frames_list = [slow_pathway, fast_pathway]
|
|
results['imgs'] = frames_list
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class GroupFullResSample(object):
|
|
def __init__(self, crop_size, flip=False):
|
|
self.crop_size = crop_size if not isinstance(crop_size, int) else (
|
|
crop_size, crop_size)
|
|
self.flip = flip
|
|
|
|
def __call__(self, results):
|
|
img_group = results['imgs']
|
|
|
|
image_w, image_h = img_group[0].size
|
|
crop_w, crop_h = self.crop_size
|
|
|
|
w_step = (image_w - crop_w) // 4
|
|
h_step = (image_h - crop_h) // 4
|
|
|
|
offsets = list()
|
|
offsets.append((0 * w_step, 2 * h_step)) # left
|
|
offsets.append((4 * w_step, 2 * h_step)) # right
|
|
offsets.append((2 * w_step, 2 * h_step)) # center
|
|
|
|
oversample_group = list()
|
|
for o_w, o_h in offsets:
|
|
normal_group = list()
|
|
flip_group = list()
|
|
for i, img in enumerate(img_group):
|
|
crop = img.crop((o_w, o_h, o_w + crop_w, o_h + crop_h))
|
|
normal_group.append(crop)
|
|
if self.flip:
|
|
flip_crop = crop.copy().transpose(Image.FLIP_LEFT_RIGHT)
|
|
flip_group.append(flip_crop)
|
|
|
|
oversample_group.extend(normal_group)
|
|
if self.flip:
|
|
oversample_group.extend(flip_group)
|
|
|
|
results['imgs'] = oversample_group
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class TenCrop:
|
|
"""
|
|
Crop out 5 regions (4 corner points + 1 center point) from the picture,
|
|
and then flip the cropping result to get 10 cropped images, which can make the prediction result more robust.
|
|
Args:
|
|
target_size(int | tuple[int]): (w, h) of target size for crop.
|
|
"""
|
|
def __init__(self, target_size):
|
|
self.target_size = (target_size, target_size)
|
|
|
|
def __call__(self, results):
|
|
imgs = results['imgs']
|
|
img_w, img_h = imgs[0].size
|
|
crop_w, crop_h = self.target_size
|
|
w_step = (img_w - crop_w) // 4
|
|
h_step = (img_h - crop_h) // 4
|
|
offsets = [
|
|
(0, 0),
|
|
(4 * w_step, 0),
|
|
(0, 4 * h_step),
|
|
(4 * w_step, 4 * h_step),
|
|
(2 * w_step, 2 * h_step),
|
|
]
|
|
img_crops = list()
|
|
for x_offset, y_offset in offsets:
|
|
crop = [
|
|
img.crop(
|
|
(x_offset, y_offset, x_offset + crop_w, y_offset + crop_h))
|
|
for img in imgs
|
|
]
|
|
crop_fliped = [
|
|
timg.transpose(Image.FLIP_LEFT_RIGHT) for timg in crop
|
|
]
|
|
img_crops.extend(crop)
|
|
img_crops.extend(crop_fliped)
|
|
|
|
results['imgs'] = img_crops
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class UniformCrop:
|
|
"""
|
|
Perform uniform spatial sampling on the images,
|
|
select the two ends of the long side and the middle position (left middle right or top middle bottom) 3 regions.
|
|
Args:
|
|
target_size(int | tuple[int]): (w, h) of target size for crop.
|
|
"""
|
|
def __init__(self, target_size, backend='cv2'):
|
|
if isinstance(target_size, tuple):
|
|
self.target_size = target_size
|
|
elif isinstance(target_size, int):
|
|
self.target_size = (target_size, target_size)
|
|
else:
|
|
raise TypeError(
|
|
f'target_size must be int or tuple[int], but got {type(target_size)}'
|
|
)
|
|
self.backend = backend
|
|
|
|
def __call__(self, results):
|
|
|
|
imgs = results['imgs']
|
|
if 'backend' in results and results['backend'] == 'pyav': # [c,t,h,w]
|
|
img_h, img_w = imgs.shape[2:]
|
|
elif self.backend == 'pillow':
|
|
img_w, img_h = imgs[0].size
|
|
else:
|
|
img_h, img_w = imgs[0].shape[:2]
|
|
|
|
crop_w, crop_h = self.target_size
|
|
if crop_h == img_h:
|
|
w_step = (img_w - crop_w) // 2
|
|
offsets = [
|
|
(0, 0),
|
|
(w_step * 2, 0),
|
|
(w_step, 0),
|
|
]
|
|
elif crop_w == img_w:
|
|
h_step = (img_h - crop_h) // 2
|
|
offsets = [
|
|
(0, 0),
|
|
(0, h_step * 2),
|
|
(0, h_step),
|
|
]
|
|
else:
|
|
raise ValueError(
|
|
f"img_w({img_w}) == crop_w({crop_w}) or img_h({img_h}) == crop_h({crop_h})"
|
|
)
|
|
img_crops = []
|
|
if 'backend' in results and results['backend'] == 'pyav': # [c,t,h,w]
|
|
for x_offset, y_offset in offsets:
|
|
crop = imgs[:, :, y_offset:y_offset + crop_h,
|
|
x_offset:x_offset + crop_w]
|
|
img_crops.append(crop)
|
|
img_crops = paddle.concat(img_crops, axis=1)
|
|
else:
|
|
if self.backend == 'pillow':
|
|
for x_offset, y_offset in offsets:
|
|
crop = [
|
|
img.crop((x_offset, y_offset, x_offset + crop_w,
|
|
y_offset + crop_h)) for img in imgs
|
|
]
|
|
img_crops.extend(crop)
|
|
else:
|
|
for x_offset, y_offset in offsets:
|
|
crop = [
|
|
img[y_offset:y_offset + crop_h,
|
|
x_offset:x_offset + crop_w] for img in imgs
|
|
]
|
|
img_crops.extend(crop)
|
|
results['imgs'] = img_crops
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class GroupResize(object):
|
|
def __init__(self, height, width, scale, K, mode='train'):
|
|
self.height = height
|
|
self.width = width
|
|
self.scale = scale
|
|
self.resize = {}
|
|
self.K = np.array(K, dtype=np.float32)
|
|
self.mode = mode
|
|
for i in range(self.scale):
|
|
s = 2**i
|
|
self.resize[i] = paddle.vision.transforms.Resize(
|
|
(self.height // s, self.width // s), interpolation='lanczos')
|
|
|
|
def __call__(self, results):
|
|
if self.mode == 'infer':
|
|
imgs = results['imgs']
|
|
for k in list(imgs): # ("color", 0, -1)
|
|
if "color" in k or "color_n" in k:
|
|
n, im, _ = k
|
|
for i in range(self.scale):
|
|
imgs[(n, im, i)] = self.resize[i](imgs[(n, im, i - 1)])
|
|
else:
|
|
imgs = results['imgs']
|
|
for scale in range(self.scale):
|
|
K = self.K.copy()
|
|
|
|
K[0, :] *= self.width // (2**scale)
|
|
K[1, :] *= self.height // (2**scale)
|
|
|
|
inv_K = np.linalg.pinv(K)
|
|
imgs[("K", scale)] = K
|
|
imgs[("inv_K", scale)] = inv_K
|
|
|
|
for k in list(imgs):
|
|
if "color" in k or "color_n" in k:
|
|
n, im, i = k
|
|
for i in range(self.scale):
|
|
imgs[(n, im, i)] = self.resize[i](imgs[(n, im, i - 1)])
|
|
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class ColorJitter(object):
|
|
"""Randomly change the brightness, contrast, saturation and hue of an image.
|
|
"""
|
|
def __init__(self,
|
|
brightness=0,
|
|
contrast=0,
|
|
saturation=0,
|
|
hue=0,
|
|
mode='train',
|
|
p=0.5,
|
|
keys=None):
|
|
self.mode = mode
|
|
self.colorjitter = paddle.vision.transforms.ColorJitter(
|
|
brightness, contrast, saturation, hue)
|
|
self.p = p
|
|
|
|
def __call__(self, results):
|
|
"""
|
|
Args:
|
|
results (PIL Image): Input image.
|
|
|
|
Returns:
|
|
PIL Image: Color jittered image.
|
|
"""
|
|
|
|
do_color_aug = random.random() > self.p
|
|
imgs = results['imgs']
|
|
for k in list(imgs):
|
|
f = imgs[k]
|
|
if "color" in k or "color_n" in k:
|
|
n, im, i = k
|
|
imgs[(n, im, i)] = f
|
|
if do_color_aug:
|
|
imgs[(n + "_aug", im, i)] = self.colorjitter(f)
|
|
else:
|
|
imgs[(n + "_aug", im, i)] = f
|
|
if self.mode == "train":
|
|
for i in results['frame_idxs']:
|
|
del imgs[("color", i, -1)]
|
|
del imgs[("color_aug", i, -1)]
|
|
del imgs[("color_n", i, -1)]
|
|
del imgs[("color_n_aug", i, -1)]
|
|
else:
|
|
for i in results['frame_idxs']:
|
|
del imgs[("color", i, -1)]
|
|
del imgs[("color_aug", i, -1)]
|
|
|
|
results['img'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class GroupRandomFlip(object):
|
|
def __init__(self, p=0.5):
|
|
self.p = p
|
|
|
|
def __call__(self, results):
|
|
|
|
imgs = results['imgs']
|
|
do_flip = random.random() > self.p
|
|
if do_flip:
|
|
for k in list(imgs):
|
|
if "color" in k or "color_n" in k:
|
|
n, im, i = k
|
|
imgs[(n, im,
|
|
i)] = imgs[(n, im,
|
|
i)].transpose(Image.FLIP_LEFT_RIGHT)
|
|
if "depth_gt" in imgs:
|
|
imgs['depth_gt'] = np.array(np.fliplr(imgs['depth_gt']))
|
|
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class ToArray(object):
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, results):
|
|
imgs = results['imgs']
|
|
for k in list(imgs):
|
|
if "color" in k or "color_n" in k or "color_aug" in k or "color_n_aug" in k:
|
|
n, im, i = k
|
|
imgs[(n, im,
|
|
i)] = np.array(imgs[(n, im, i)]).astype('float32') / 255.0
|
|
imgs[(n, im, i)] = imgs[(n, im, i)].transpose((2, 0, 1))
|
|
if "depth_gt" in imgs:
|
|
imgs['depth_gt'] = np.array(imgs['depth_gt']).astype('float32')
|
|
|
|
results['imgs'] = imgs
|
|
return results
|
|
|
|
|
|
@PIPELINES.register()
|
|
class YowoAug(object):
|
|
def __init__(self, target_size=224, jitter=0.2, hue=0.1, saturation=1.5, exposure=1.5, valid_mode=False):
|
|
self.shape = (target_size, target_size)
|
|
self.jitter = jitter
|
|
self.hue = hue
|
|
self.saturation = saturation
|
|
self.exposure = exposure
|
|
self.valid_mode = valid_mode
|
|
|
|
def _rand_scale(self, s):
|
|
scale = random.uniform(1, s)
|
|
if (random.randint(1, 10000) % 2):
|
|
return scale
|
|
return 1. / scale
|
|
|
|
def _distort_image(self, im, hue, sat, val):
|
|
im = im.convert('HSV')
|
|
cs = list(im.split())
|
|
cs[1] = cs[1].point(lambda i: i * sat)
|
|
cs[2] = cs[2].point(lambda i: i * val)
|
|
|
|
def _change_hue(x):
|
|
x += hue * 255
|
|
if x > 255:
|
|
x -= 255
|
|
if x < 0:
|
|
x += 255
|
|
return x
|
|
|
|
cs[0] = cs[0].point(_change_hue)
|
|
im = Image.merge(im.mode, tuple(cs))
|
|
|
|
im = im.convert('RGB')
|
|
# constrain_image(im)
|
|
return im
|
|
|
|
def _random_distort_image(self, im, dhue, dsat, dexp):
|
|
res = self._distort_image(im, dhue, dsat, dexp)
|
|
return res
|
|
|
|
def _read_truths_args(self, lab_path, min_box_scale):
|
|
truths = np.loadtxt(lab_path)
|
|
truths = np.reshape(truths, (truths.size // 5, 5))
|
|
new_truths = []
|
|
for i in range(truths.shape[0]):
|
|
cx = (truths[i][1] + truths[i][3]) / (2 * 320)
|
|
cy = (truths[i][2] + truths[i][4]) / (2 * 240)
|
|
imgw = (truths[i][3] - truths[i][1]) / 320
|
|
imgh = (truths[i][4] - truths[i][2]) / 240
|
|
truths[i][0] = truths[i][0] - 1
|
|
truths[i][1] = cx
|
|
truths[i][2] = cy
|
|
truths[i][3] = imgw
|
|
truths[i][4] = imgh
|
|
|
|
if truths[i][3] < min_box_scale:
|
|
continue
|
|
new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]])
|
|
return np.array(new_truths)
|
|
|
|
def _fill_truth_detection(self, labpath, flip, dx, dy, sx, sy):
|
|
max_boxes = 50
|
|
label = np.zeros((max_boxes, 5))
|
|
bs = np.loadtxt(labpath)
|
|
bs = np.reshape(bs, (-1, 5))
|
|
|
|
for i in range(bs.shape[0]):
|
|
cx = (bs[i][1] + bs[i][3]) / (2 * 320)
|
|
cy = (bs[i][2] + bs[i][4]) / (2 * 240)
|
|
imgw = (bs[i][3] - bs[i][1]) / 320
|
|
imgh = (bs[i][4] - bs[i][2]) / 240
|
|
bs[i][0] = bs[i][0] - 1
|
|
bs[i][1] = cx
|
|
bs[i][2] = cy
|
|
bs[i][3] = imgw
|
|
bs[i][4] = imgh
|
|
|
|
cc = 0
|
|
for i in range(bs.shape[0]):
|
|
x1 = bs[i][1] - bs[i][3] / 2
|
|
y1 = bs[i][2] - bs[i][4] / 2
|
|
x2 = bs[i][1] + bs[i][3] / 2
|
|
y2 = bs[i][2] + bs[i][4] / 2
|
|
|
|
x1 = min(0.999, max(0, x1 * sx - dx))
|
|
y1 = min(0.999, max(0, y1 * sy - dy))
|
|
x2 = min(0.999, max(0, x2 * sx - dx))
|
|
y2 = min(0.999, max(0, y2 * sy - dy))
|
|
|
|
bs[i][1] = (x1 + x2) / 2
|
|
bs[i][2] = (y1 + y2) / 2
|
|
bs[i][3] = (x2 - x1)
|
|
bs[i][4] = (y2 - y1)
|
|
|
|
if flip:
|
|
bs[i][1] = 0.999 - bs[i][1]
|
|
|
|
if bs[i][3] < 0.001 or bs[i][4] < 0.001:
|
|
continue
|
|
label[cc] = bs[i]
|
|
cc += 1
|
|
if cc >= 50:
|
|
break
|
|
|
|
label = np.reshape(label, (-1))
|
|
return label
|
|
|
|
def __call__(self, results):
|
|
clip = results['imgs']
|
|
frame_num = len(clip)
|
|
oh = clip[0].height
|
|
ow = clip[0].width
|
|
labpath = results['filename'].replace('jpg', 'txt').replace('rgb-images', 'labels')
|
|
if not self.valid_mode:
|
|
dw = int(ow * self.jitter)
|
|
dh = int(oh * self.jitter)
|
|
|
|
pleft = random.randint(-dw, dw)
|
|
pright = random.randint(-dw, dw)
|
|
ptop = random.randint(-dh, dh)
|
|
pbot = random.randint(-dh, dh)
|
|
|
|
swidth = ow - pleft - pright
|
|
sheight = oh - ptop - pbot
|
|
|
|
sx = float(swidth) / ow
|
|
sy = float(sheight) / oh
|
|
|
|
dx = (float(pleft) / ow) / sx
|
|
dy = (float(ptop) / oh) / sy
|
|
|
|
flip = random.randint(1, 10000) % 2
|
|
|
|
dhue = random.uniform(-self.hue, self.hue)
|
|
dsat = self._rand_scale(self.saturation)
|
|
dexp = self._rand_scale(self.exposure)
|
|
|
|
# Augment
|
|
cropped = [img.crop((pleft, ptop, pleft + swidth - 1, ptop + sheight - 1)) for img in clip]
|
|
|
|
sized = [img.resize(self.shape) for img in cropped]
|
|
|
|
if flip:
|
|
sized = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in sized]
|
|
|
|
clip = [self._random_distort_image(img, dhue, dsat, dexp) for img in sized]
|
|
|
|
label = self._fill_truth_detection(labpath, flip, dx, dy, 1. / sx, 1. / sy)
|
|
|
|
else:
|
|
label = np.zeros([50 * 5])
|
|
tmp = self._read_truths_args(labpath, 8.0 / clip[0].width).astype('float32')
|
|
tmp = np.reshape(tmp, [-1])
|
|
tsz = tmp.size
|
|
if tsz > 50 * 5:
|
|
label = tmp[0:50 * 5]
|
|
elif tsz > 0:
|
|
label[0:tsz] = tmp
|
|
clip = [img.resize(self.shape) for img in clip]
|
|
|
|
clip = [np.asarray(img).astype('float32') / 255.0 for img in clip]
|
|
clip = np.concatenate(clip, 0).reshape([frame_num, 224, 224, 3])
|
|
clip = np.transpose(clip, [3, 0, 1, 2])
|
|
results['imgs'] = clip
|
|
results['labels'] = label
|
|
return results
|