Source code for cvpods.data.transforms.transform_gen
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright (c) BaseDetection, Inc. and its affiliates. All Rights Reserved
import inspect
import pprint
import sys
from abc import ABCMeta, abstractmethod
import numpy as np
from PIL import Image
import torch
from cvpods.structures import Boxes, BoxMode, pairwise_iou
from ..registry import TRANSFORMS
from .auto_aug import AutoAugmentTransform
from .transform import ( # isort:skip
ScaleTransform,
AffineTransform,
BlendTransform,
IoUCropTransform,
CropTransform,
CropPadTransform,
HFlipTransform,
NoOpTransform,
VFlipTransform,
DistortTransform,
RandomSwapChannelsTransform,
ExpandTransform,
ExtentTransform,
ResizeTransform,
# Transforms used in ssl
GaussianBlurTransform,
GaussianBlurConvTransform,
SolarizationTransform,
ComposeTransform,
# LabSpaceTransform,
PadTransform,
)
__all__ = [
"Pad",
"RandomScale",
"Expand",
"MinIoURandomCrop",
"RandomSwapChannels",
"CenterAffine",
"RandomBrightness",
"RandomContrast",
"RandomCrop",
"RandomCropWithInstance",
"RandomCropWithMaxAreaLimit",
"RandomCropPad",
"RandomExtent",
"RandomFlip",
"RandomSaturation",
"RandomLighting",
"RandomDistortion",
"Resize",
"ResizeShortestEdge",
"ResizeLongestEdge",
"ShuffleList",
"RandomList",
"RepeatList",
"TransformGen",
"TorchTransformGen",
# transforms used in ssl
"GaussianBlur",
"GaussianBlurConv",
"Solarization",
"AutoAugment",
]
def check_dtype(img):
"""
Check the image data type and dimensions to ensure that transforms can be applied on it.
Args:
img (np.array): image to be checked.
"""
assert isinstance(
img, np.ndarray
), "[TransformGen] Needs an numpy array, but got a {}!".format(type(img))
assert not isinstance(img.dtype, np.integer) or (
img.dtype == np.uint8
), "[TransformGen] Got image of type {}, use uint8 or floating points instead!".format(
img.dtype
)
assert img.ndim in [2, 3], img.ndim
[docs]@TRANSFORMS.register()
class TransformGen(metaclass=ABCMeta):
"""
TransformGen takes an image of type uint8 in range [0, 255], or
floating point in range [0, 1] or [0, 255] as input.
It creates a :class:`Transform` based on the given image, sometimes with randomness.
The transform can then be used to transform images
or other data (boxes, points, annotations, etc.) associated with it.
The assumption made in this class
is that the image itself is sufficient to instantiate a transform.
When this assumption is not true, you need to create the transforms by your own.
A list of `TransformGen` can be applied with :func:`apply_transform_gens`.
"""
def _init(self, params=None):
if params:
for k, v in params.items():
if k != "self" and not k.startswith("_"):
setattr(self, k, v)
def __call__(self, img, annotations=None, **kwargs):
return self.get_transform(img, annotations)(img, annotations, **kwargs)
def _rand_range(self, low=1.0, high=None, size=None):
"""
Uniform float random number between low and high.
"""
if high is None:
low, high = 0, low
if size is None:
size = []
return np.random.uniform(low, high, size)
[docs] def __repr__(self):
"""
Produce something like:
"MyTransformGen(field1={self.field1}, field2={self.field2})"
"""
try:
sig = inspect.signature(self.__init__)
classname = type(self).__name__
argstr = []
for name, param in sig.parameters.items():
assert (
param.kind != param.VAR_POSITIONAL
and param.kind != param.VAR_KEYWORD
), "The default __repr__ doesn't support *args or **kwargs"
assert hasattr(self, name), (
"Attribute {} not found! "
"Default __repr__ only works if attributes match the constructor.".format(
name
)
)
attr = getattr(self, name)
default = param.default
if default is attr:
continue
argstr.append("{}={}".format(name, pprint.pformat(attr)))
return "{}({})".format(classname, ", ".join(argstr))
except AssertionError:
return super().__repr__()
__str__ = __repr__
[docs]@TRANSFORMS.register()
class RandomFlip(TransformGen):
"""
Flip the image horizontally or vertically with the given probability.
"""
[docs] def __init__(self, prob=0.5, *, horizontal=True, vertical=False):
"""
Args:
prob (float): probability of flip.
horizontal (boolean): whether to apply horizontal flipping
vertical (boolean): whether to apply vertical flipping
"""
super().__init__()
if horizontal and vertical:
raise ValueError(
"Cannot do both horiz and vert. Please use two Flip instead."
)
if not horizontal and not vertical:
raise ValueError("At least one of horiz or vert has to be True!")
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
h, w = img.shape[:2]
do = self._rand_range() < self.prob
if do:
if self.horizontal:
return HFlipTransform(w)
elif self.vertical:
return VFlipTransform(h)
else:
return NoOpTransform()
[docs]@TRANSFORMS.register()
class TorchTransformGen:
"""
Wrapper transfrom of transforms in torchvision.
It convert img (np.ndarray) to PIL image, and convert back to np.ndarray after transform.
"""
def __init__(self, tfm):
self.tfm = tfm
def __call__(self, img: np.ndarray, annotations: None, **kwargs):
pil_image = Image.fromarray(img)
return np.array(self.tfm(pil_image)), annotations
[docs]@TRANSFORMS.register()
class RandomDistortion(TransformGen):
"""
Random distort image's hue, saturation and exposure.
"""
[docs] def __init__(self, hue, saturation, exposure, image_format="BGR"):
"""
RandomDistortion Initialization.
Args:
hue (float): value of hue
saturation (float): value of saturation
exposure (float): value of exposure
"""
assert image_format in ["RGB", "BGR"]
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
return DistortTransform(self.hue, self.saturation, self.exposure, self.image_format)
[docs]@TRANSFORMS.register()
class CenterAffine(TransformGen):
"""
Affine Transform for CenterNet
"""
[docs] def __init__(self, boarder, output_size, pad_value=[0, 0, 0], random_aug=True):
"""
output_size (w, h) shape
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
img_shape = img.shape[:2]
center, scale = self.generate_center_and_scale(img_shape)
src, dst = self.generate_src_and_dst(center, scale, self.output_size)
return AffineTransform(src, dst, self.output_size, self.pad_value)
@staticmethod
def _get_boarder(boarder, size):
"""
This func may be rewirite someday
"""
i = 1
size //= 2
while size <= boarder // i:
i *= 2
return boarder // i
[docs] def generate_center_and_scale(self, img_shape):
"""
generate center
shpae : (h, w)
"""
height, width = img_shape
center = np.array([width / 2, height / 2], dtype=np.float32)
scale = float(max(img_shape))
if self.random_aug:
scale = scale * np.random.choice(np.arange(0.6, 1.4, 0.1))
h_boarder = self._get_boarder(self.boarder, height)
w_boarder = self._get_boarder(self.boarder, width)
center[0] = np.random.randint(low=w_boarder, high=width - w_boarder)
center[1] = np.random.randint(low=h_boarder, high=height - h_boarder)
else:
pass
return center, scale
[docs] @staticmethod
def generate_src_and_dst(center, scale, output_size):
if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
scale = np.array([scale, scale], dtype=np.float32)
src = np.zeros((3, 2), dtype=np.float32)
src_w = scale[0]
src_dir = [0, src_w * -0.5]
src[0, :] = center
src[1, :] = src[0, :] + src_dir
src[2, :] = src[1, :] + (src_dir[1], -src_dir[0])
dst = np.zeros((3, 2), dtype=np.float32)
dst_w, dst_h = output_size
dst_dir = [0, dst_w * -0.5]
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = dst[0, :] + dst_dir
dst[2, :] = dst[1, :] + (dst_dir[1], -dst_dir[0])
return src, dst
[docs]@TRANSFORMS.register()
class GaussianBlur(TransformGen):
"""
Gaussian blur transform.
"""
[docs] def __init__(self, sigma, p=1.0):
"""
Args:
sigma (List(float)): sigma of gaussian
p (float): probability of perform this augmentation
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
return GaussianBlurTransform(self.sigma, self.p)
[docs]@TRANSFORMS.register()
class Solarization(TransformGen):
def __init__(self, threshold=128, p=0.5):
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
return SolarizationTransform(self.threshold, self.p)
[docs]@TRANSFORMS.register()
class GaussianBlurConv(TransformGen):
def __init__(self, kernel_size, p):
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
return GaussianBlurConvTransform(self.kernel_size, self.p)
[docs]@TRANSFORMS.register()
class Resize(TransformGen):
"""
Resize image to a target size
"""
[docs] def __init__(self, shape, interp=Image.BILINEAR):
"""
Args:
shape: (h, w) tuple or a int.
interp: PIL interpolation method.
"""
if isinstance(shape, int):
shape = (shape, shape)
shape = tuple(shape)
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
return ResizeTransform(
img.shape[0], img.shape[1], self.shape[0], self.shape[1], self.interp
)
[docs]@TRANSFORMS.register()
class ResizeLongestEdge(TransformGen):
"""
Scale the longer edge to the given size.
"""
[docs] def __init__(self, long_edge_length, sample_style="range", interp=Image.BILINEAR,
jitter=(0.0, 32)):
"""
Args:
long_edge_length (list[int]): If ``sample_style=="range"``,
a [min, max] interval from which to sample the shortest edge length.
If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
sample_style (str): either "range" or "choice".
interp: PIL interpolation method.
"""
super().__init__()
assert sample_style in ["range", "choice"], sample_style
self.is_range = sample_style == "range"
if isinstance(long_edge_length, int):
long_edge_length = (long_edge_length, long_edge_length)
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
h, w = img.shape[:2]
if self.is_range:
size = np.random.randint(
self.long_edge_length[0], self.long_edge_length[1] + 1
)
else:
size = np.random.choice(self.long_edge_length)
if size == 0:
return NoOpTransform()
if self.jitter[0] > 0:
dw = self.jitter[0] * w
dh = self.jitter[0] * h
size = max(h, w) + np.random.uniform(low=-max(dw, dh), high=max(dw, dh))
size -= size % self.jitter[1]
scale = size * 1.0 / max(h, w)
if h < w:
newh, neww = scale * h, size
else:
newh, neww = size, scale * w
neww = int(neww + 0.5)
newh = int(newh + 0.5)
return ResizeTransform(h, w, newh, neww, self.interp)
[docs]@TRANSFORMS.register()
class ResizeShortestEdge(TransformGen):
"""
Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
"""
[docs] def __init__(
self,
short_edge_length,
max_size=sys.maxsize,
sample_style="range",
interp=Image.BILINEAR,
):
"""
Args:
short_edge_length (list[int]): If ``sample_style=="range"``,
a [min, max] interval from which to sample the shortest edge length.
If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
max_size (int): maximum allowed longest edge length.
sample_style (str): either "range" or "choice".
interp: PIL interpolation method.
"""
super().__init__()
assert sample_style in ["range", "choice"], sample_style
self.is_range = sample_style == "range"
if isinstance(short_edge_length, int):
short_edge_length = (short_edge_length, short_edge_length)
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
h, w = img.shape[:2]
if self.is_range:
size = np.random.randint(
self.short_edge_length[0], self.short_edge_length[1] + 1
)
else:
size = np.random.choice(self.short_edge_length)
if size == 0:
return NoOpTransform()
scale = size * 1.0 / min(h, w)
if h < w:
newh, neww = size, scale * w
else:
newh, neww = scale * h, size
if max(newh, neww) > self.max_size:
scale = self.max_size * 1.0 / max(newh, neww)
newh = newh * scale
neww = neww * scale
neww = int(neww + 0.5)
newh = int(newh + 0.5)
return ResizeTransform(h, w, newh, neww, self.interp)
[docs]@TRANSFORMS.register()
class RandomCrop(TransformGen):
"""
Randomly crop a subimage out of an image.
"""
[docs] def __init__(self, crop_type: str, crop_size, strict_mode=True):
"""
Args:
crop_type (str): one of "relative_range", "relative", "absolute".
See `config/defaults.py` for explanation.
crop_size (tuple[float]): the relative ratio or absolute pixels of
height and width
strict_mode (bool): if `True`, the target `crop_size` must be smaller than
the original image size.
"""
super().__init__()
assert crop_type in ["relative_range", "relative", "absolute"]
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
h, w = img.shape[:2]
croph, cropw = self.get_crop_size((h, w))
if self.strict_mode:
assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(
self
)
offset_range_h = max(h - croph, 0)
offset_range_w = max(w - cropw, 0)
h0 = np.random.randint(offset_range_h + 1)
w0 = np.random.randint(offset_range_w + 1)
return CropTransform(w0, h0, cropw, croph)
[docs] def get_crop_size(self, image_size):
"""
Args:
image_size (tuple): height, width
Returns:
crop_size (tuple): height, width in absolute pixels
"""
h, w = image_size
if self.crop_type == "relative":
ch, cw = self.crop_size
return int(h * ch + 0.5), int(w * cw + 0.5)
elif self.crop_type == "relative_range":
crop_size = np.asarray(self.crop_size, dtype=np.float32)
ch, cw = crop_size + np.random.rand(2) * (1 - crop_size)
return int(h * ch + 0.5), int(w * cw + 0.5)
elif self.crop_type == "absolute":
return self.crop_size
else:
NotImplementedError("Unknown crop type {}".format(self.crop_type))
[docs]@TRANSFORMS.register()
class RandomCropWithInstance(RandomCrop):
"""
Make sure the cropping region contains the center of a random instance from annotations.
"""
[docs] def get_transform(self, img, annotations=None):
h, w = img.shape[:2]
croph, cropw = self.get_crop_size((h, w))
if self.strict_mode:
assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(
self
)
offset_range_h = max(h - croph, 0)
offset_range_w = max(w - cropw, 0)
# Make sure there is always at least one instance in the image
assert annotations is not None, "Can not get annotations infos."
instance = np.random.choice(annotations)
bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
bbox = torch.tensor(bbox)
center_xy = (bbox[:2] + bbox[2:]) / 2.0
offset_range_h_min = max(center_xy[1] - croph, 0)
offset_range_w_min = max(center_xy[0] - cropw, 0)
offset_range_h_max = min(offset_range_h, center_xy[1] - 1)
offset_range_w_max = min(offset_range_w, center_xy[0] - 1)
h0 = np.random.randint(offset_range_h_min, offset_range_h_max + 1)
w0 = np.random.randint(offset_range_w_min, offset_range_w_max + 1)
return CropTransform(w0, h0, cropw, croph)
[docs]@TRANSFORMS.register()
class RandomCropWithMaxAreaLimit(RandomCrop):
"""
Find a cropping window such that no single category occupies more than
`single_category_max_area` in `sem_seg`.
The function retries random cropping 10 times max.
"""
def __init__(self, crop_type: str, crop_size, strict_mode=True,
single_category_max_area=1.0, ignore_value=255):
super().__init__(crop_type, crop_size, strict_mode)
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
if self.single_category_max_area >= 1.0:
crop_tfm = super().get_transform(img, annotations)
else:
h, w = img.shape[:2]
assert "sem_seg" in annotations[0]
sem_seg = annotations[0]["sem_seg"]
croph, cropw = self.get_crop_size((h, w))
for _ in range(10):
y0 = np.random.randint(h - croph + 1)
x0 = np.random.randint(w - cropw + 1)
sem_seg_temp = sem_seg[y0: y0 + croph, x0: x0 + cropw]
labels, cnt = np.unique(sem_seg_temp, return_counts=True)
cnt = cnt[labels != self.ignore_value]
if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.single_category_max_area:
break
crop_tfm = CropTransform(x0, y0, cropw, croph)
return crop_tfm
[docs]@TRANSFORMS.register()
class RandomCropPad(RandomCrop):
"""
Randomly crop and pad a subimage out of an image.
"""
def __init__(self,
crop_type: str,
crop_size,
img_value=None,
seg_value=None):
super().__init__(crop_type, crop_size, strict_mode=False)
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
h, w = img.shape[:2]
croph, cropw = self.get_crop_size((h, w))
h0 = np.random.randint(h - croph + 1) if h >= croph else 0
w0 = np.random.randint(w - cropw + 1) if w >= cropw else 0
dh = min(h, croph)
dw = min(w, cropw)
# print(w0, h0, dw, dh)
return CropPadTransform(w0, h0, dw, dh, cropw, croph, self.img_value,
self.seg_value)
[docs]@TRANSFORMS.register()
class RandomExtent(TransformGen):
"""
Outputs an image by cropping a random "subrect" of the source image.
The subrect can be parameterized to include pixels outside the source image,
in which case they will be set to zeros (i.e. black). The size of the output
image will vary with the size of the random subrect.
"""
[docs] def __init__(self, scale_range, shift_range):
"""
Args:
scale_range (l, h): Range of input-to-output size scaling factor.
shift_range (x, y): Range of shifts of the cropped subrect. The rect
is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)],
where (w, h) is the (width, height) of the input image. Set each
component to zero to crop at the image's center.
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
img_h, img_w = img.shape[:2]
# Initialize src_rect to fit the input image.
src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h])
# Apply a random scaling to the src_rect.
src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1])
# Apply a random shift to the coordinates origin.
src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5)
src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5)
# Map src_rect coordinates into image coordinates (center at corner).
src_rect[0::2] += 0.5 * img_w
src_rect[1::2] += 0.5 * img_h
return ExtentTransform(
src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]),
output_size=(
int(src_rect[3] - src_rect[1]),
int(src_rect[2] - src_rect[0]),
),
)
[docs]@TRANSFORMS.register()
class RandomContrast(TransformGen):
"""
Randomly transforms image contrast.
Contrast intensity is uniformly sampled in (intensity_min, intensity_max).
- intensity < 1 will reduce contrast
- intensity = 1 will preserve the input image
- intensity > 1 will increase contrast
See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
"""
[docs] def __init__(self, intensity_min, intensity_max, prob=1.0):
"""
Args:
intensity_min (float): Minimum augmentation.
intensity_max (float): Maximum augmentation.
prob (float): probability of transforms image contrast.
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
do = self._rand_range() < self.prob
if do:
w = np.random.uniform(self.intensity_min, self.intensity_max)
return BlendTransform(src_image=img.mean(), src_weight=1 - w, dst_weight=w)
else:
return NoOpTransform()
[docs]@TRANSFORMS.register()
class RandomBrightness(TransformGen):
"""
Randomly transforms image brightness.
Brightness intensity is uniformly sampled in (intensity_min, intensity_max).
- intensity < 1 will reduce brightness
- intensity = 1 will preserve the input image
- intensity > 1 will increase brightness
See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
"""
[docs] def __init__(self, intensity_min, intensity_max, prob=1.):
"""
Args:
intensity_min (float): Minimum augmentation.
intensity_max (float): Maximum augmentation.
prob (float): probability of transforms image brightness.
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
do = self._rand_range() < self.prob
if do:
w = np.random.uniform(self.intensity_min, self.intensity_max)
return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w)
else:
return NoOpTransform()
[docs]@TRANSFORMS.register()
class RandomSaturation(TransformGen):
"""
Randomly transforms image saturation.
Saturation intensity is uniformly sampled in (intensity_min, intensity_max).
- intensity < 1 will reduce saturation (make the image more grayscale)
- intensity = 1 will preserve the input image
- intensity > 1 will increase saturation
See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
"""
[docs] def __init__(self, intensity_min, intensity_max, prob=1.0):
"""
Args:
intensity_min (float): Minimum augmentation (1 preserves input).
intensity_max (float): Maximum augmentation (1 preserves input).
prob (float): probability of transforms image saturation.
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
do = self._rand_range() < self.prob
if do:
assert img.shape[-1] == 3, "Saturation only works on RGB images"
w = np.random.uniform(self.intensity_min, self.intensity_max)
grayscale = img.dot([0.299, 0.587, 0.114])[:, :, np.newaxis]
return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w)
else:
return NoOpTransform()
[docs]@TRANSFORMS.register()
class RandomLighting(TransformGen):
"""
Randomly transforms image color using fixed PCA over ImageNet.
The degree of color jittering is randomly sampled via a normal distribution,
with standard deviation given by the scale parameter.
"""
[docs] def __init__(self, scale):
"""
Args:
scale (float): Standard deviation of principal component weighting.
"""
super().__init__()
self._init(locals())
self.eigen_vecs = np.array(
[
[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203],
]
)
self.eigen_vals = np.array([0.2175, 0.0188, 0.0045])
[docs] def get_transform(self, img, annotations=None):
assert img.shape[-1] == 3, "Saturation only works on RGB images"
weights = np.random.normal(scale=self.scale, size=3)
return BlendTransform(
src_image=self.eigen_vecs.dot(weights * self.eigen_vals),
src_weight=1.0,
dst_weight=1.0,
)
[docs]@TRANSFORMS.register()
class RandomSwapChannels(TransformGen):
"""
Randomly swap image channels.
"""
[docs] def __init__(self, prob=0.5):
"""
Args:
prob (float): probability of swap channels.
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
_, w = img.shape[:2]
do = self._rand_range() < self.prob
if do:
return RandomSwapChannelsTransform()
else:
return NoOpTransform()
[docs]@TRANSFORMS.register()
class MinIoURandomCrop(TransformGen):
"""
Random crop the image & bboxes, the cropped patches have minimum IoU
requirement with original image & bboxes, the IoU threshold is randomly
selected from min_ious.
"""
[docs] def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
"""
Args:
min_ious (tuple): minimum IoU threshold for all intersections with bounding boxes
min_crop_size (float): minimum crop's size
(i.e. h,w := a*h, a*w, where a >= min_crop_size).
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations):
"""
Args:
img (ndarray): of shape HxWxC(RGB). The array can be of type uint8
in range [0, 255], or floating point in range [0, 255].
annotations (list[dict[str->str]]):
Each item in the list is a bbox label of an object. The object is
represented by a dict,
which contains:
- bbox (list): bbox coordinates, top left and bottom right.
- bbox_mode (str): bbox label mode, for example: `XYXY_ABS`,
`XYWH_ABS` and so on...
"""
sample_mode = (1, *self.min_ious, 0)
h, w = img.shape[:2]
boxes = list()
for obj in annotations:
boxes.append(BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS))
boxes = torch.tensor(boxes)
while True:
mode = np.random.choice(sample_mode)
if mode == 1:
return NoOpTransform()
min_iou = mode
for i in range(50):
new_w = np.random.uniform(self.min_crop_size * w, w)
new_h = np.random.uniform(self.min_crop_size * h, h)
# h / w in [0.5, 2]
if new_h / new_w < 0.5 or new_h / new_w > 2:
continue
left = np.random.uniform(w - new_w)
top = np.random.uniform(h - new_h)
patch = np.array(
(int(left), int(top), int(left + new_w), int(top + new_h)))
overlaps = pairwise_iou(
Boxes(patch.reshape(-1, 4)),
Boxes(boxes.reshape(-1, 4))
)
if overlaps.min() < min_iou:
continue
# center of boxes should inside the crop img
center = (boxes[:, :2] + boxes[:, 2:]) / 2
mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1])
* (center[:, 0] < patch[2]) * (center[:, 1] < patch[3]))
if not mask.any():
continue
return IoUCropTransform(int(left), int(top), int(new_w), int(new_h))
[docs]@TRANSFORMS.register()
class Expand(TransformGen):
"""
Random Expand the image & bboxes.
"""
[docs] def __init__(self, ratio_range=(1, 4), mean=(0, 0, 0), prob=0.5):
"""
Args:
ratio_range (tuple): range of expand ratio.
mean (tuple): mean value of dataset.
prob (float): probability of applying this transformation.
"""
super().__init__()
self._init(locals())
self.min_ratio, self.max_ratio = ratio_range
[docs] def get_transform(self, img, annotations=None):
if np.random.uniform(0, 1) > self.prob:
return NoOpTransform()
h, w, c = img.shape
ratio = np.random.uniform(self.min_ratio, self.max_ratio)
left = int(np.random.uniform(0, w * ratio - w))
top = int(np.random.uniform(0, h * ratio - h))
return ExpandTransform(left, top, ratio, self.mean)
[docs]@TRANSFORMS.register()
class RandomScale(TransformGen):
"""
Randomly scale the image according to the specified output size and scale ratio range.
This transform has the following three steps:
1. select a random scale factor according to the specified scale ratio range.
2. recompute the accurate scale_factor using rounded scaled image size.
3. select non-zero random offset (x, y) if scaled image is larger than output_size.
"""
[docs] def __init__(self, output_size, ratio_range=(0.1, 2), interp="BILINEAR"):
"""
Args:
output_size (tuple): image output size.
ratio_range (tuple): range of scale ratio.
interp (str): the interpolation method. Options includes:
* "NEAREST"
* "BILINEAR"
* "BICUBIC"
* "LANCZOS"
* "HAMMING"
* "BOX"
"""
super().__init__()
self._init(locals())
self.min_ratio, self.max_ratio = ratio_range
if isinstance(self.output_size, int):
self.output_size = [self.output_size] * 2
[docs] def get_transform(self, img, annotations=None):
h, w = img.shape[:2]
output_h, output_w = self.output_size
# 1. Select a random scale factor.
random_scale_factor = np.random.uniform(self.min_ratio, self.max_ratio)
scaled_size_h = int(random_scale_factor * output_h)
scaled_size_w = int(random_scale_factor * output_w)
# 2. Recompute the accurate scale_factor using rounded scaled image size.
image_scale_h = scaled_size_h * 1.0 / h
image_scale_w = scaled_size_w * 1.0 / w
image_scale = min(image_scale_h, image_scale_w)
# 3. Select non-zero random offset (x, y) if scaled image is larger than output_size.
scaled_h = int(h * 1.0 * image_scale)
scaled_w = int(w * 1.0 * image_scale)
return ScaleTransform(h, w, scaled_h, scaled_w, self.interp)
[docs]@TRANSFORMS.register()
class AutoAugment(TransformGen):
"""
Convert any of AutoAugment into a cvpods-fashion Transform such that can be configured in
config.py
"""
[docs] def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
"""
Args:
name (str): any type of transforms list in _RAND_TRANSFORMS.
prob (float): probability of perform current augmentation.
magnitude (int): intensity / magnitude of each augmentation.
hparams (dict): hyper-parameters required by each augmentation.
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
return AutoAugmentTransform(self.name, self.prob, self.magnitude, self.hparams)
[docs]@TRANSFORMS.register()
class Pad(TransformGen):
"""
Pad image with `pad_value` to the specified `target_h` and `target_w`.
Adds `top` rows of `pad_value` on top, `left` columns of `pad_value` on the left,
and then pads the image on the bottom and right with `pad_value` until it has
dimensions `target_h`, `target_w`.
This op does nothing if `top` and `left` is zero and the image already has size
`target_h` by `target_w`.
"""
[docs] def __init__(self, top, left, target_h, target_w, pad_value=0):
"""
Args:
top (int): number of rows of `pad_value` to add on top.
left (int): number of columns of `pad_value` to add on the left.
target_h (int): height of output image.
target_w (int): width of output image.
pad_value (int): the value used to pad the image.
"""
super().__init__()
self._init(locals())
[docs] def get_transform(self, img, annotations=None):
return PadTransform(self.top, self.left, self.target_h, self.target_w, self.pad_value)
[docs]@TRANSFORMS.register()
class RandomList(TransformGen):
"""
Random select subset of provided augmentations.
"""
[docs] def __init__(self, transforms, num_layers=2, choice_weights=None):
"""
Args:
transforms (List[TorchTransformGen]): list of transforms need to be performed.
num_layers (int): parameters of np.random.choice.
choice_weights (optional, float): parameters of np.random.choice.
"""
self.transforms = transforms
self.num_layers = num_layers
self.choice_weights = choice_weights
[docs] def get_transform(self, img, annotations=None):
tfms = np.random.choice(
self.transforms,
self.num_layers,
replace=self.choice_weights is None,
p=self.choice_weights)
return ComposeTransform(tfms)
[docs]@TRANSFORMS.register()
class ShuffleList(TransformGen):
"""
Randomly shuffle the `transforms` order.
"""
[docs] def __init__(self, transforms):
"""
Args:
transforms (list[TransformGen]): List of transform to be shuffled.
"""
super().__init__()
self.transforms = transforms
[docs] def get_transform(self, img, annotations=None):
np.random.shuffle(self.transforms)
return ComposeTransform(self.transforms)
[docs]@TRANSFORMS.register()
class RepeatList(TransformGen):
"""
Forward several times of provided transforms for a given image.
"""
[docs] def __init__(self, transforms, repeat_times):
"""
Args:
transforms (list[TransformGen]): List of transform to be repeated.
repeat_times (int): number of duplicates desired.
"""
super().__init__()
self.transforms = transforms
self.times = repeat_times
def __call__(self, img, annotations=None, **kwargs):
repeat_imgs = []
repeat_annotations = []
for t in range(self.times):
tmp_img, tmp_anno = self.get_transform(img)(img, annotations, **kwargs)
repeat_imgs.append(tmp_img)
repeat_annotations.append(tmp_anno)
repeat_imgs = np.stack(repeat_imgs, axis=0)
return repeat_imgs, repeat_annotations