#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by BaseDetection, Inc. and its affiliates.
import logging
import cv2
import numpy as np
from PIL import Image
import pycocotools.mask as mask_util
import torch
from cvpods.structures import (
BitMasks,
Boxes,
BoxMode,
Instances,
Keypoints,
PolygonMasks,
RotatedBoxes,
polygons_to_bitmask
)
from cvpods.utils import PathManager
from . import transforms as T
[docs]class SizeMismatchError(ValueError):
"""
When loaded image has difference width/height compared with annotation.
"""
# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]]
# https://www.exiv2.org/tags.html
_EXIF_ORIENT = 274 # exif 'Orientation' tag
[docs]def convert_PIL_to_numpy(image, format):
"""
Convert PIL image to numpy array of target format.
Args:
image (PIL.Image): a PIL image
format (str): the format of output image
Returns:
(np.ndarray): also see `read_image`
"""
if format is not None:
# PIL only supports RGB, so convert to RGB and flip channels over below
conversion_format = format
if format in ["BGR", "YUV-BT.601"]:
conversion_format = "RGB"
image = image.convert(conversion_format)
image = np.asarray(image)
# PIL squeezes out the channel dimension for "L", so make it HWC
if format == "L":
image = np.expand_dims(image, -1)
# handle formats not supported by PIL
elif format == "BGR":
# flip channels if needed
image = image[:, :, ::-1]
elif format == "YUV-BT.601":
image = image / 255.0
image = np.dot(image, np.array(_M_RGB2YUV).T)
return image
[docs]def convert_image_to_rgb(image, format):
"""
Convert an image from given format to RGB.
Args:
image (np.ndarray or Tensor): an HWC image
format (str): the format of input image, also see `read_image`
Returns:
(np.ndarray): (H,W,3) RGB image in 0-255 range, can be either float or uint8
"""
if isinstance(image, torch.Tensor):
image = image.cpu().numpy()
if format == "BGR":
image = image[:, :, [2, 1, 0]]
elif format == "YUV-BT.601":
image = np.dot(image, np.array(_M_YUV2RGB).T)
image = image * 255.0
else:
if format == "L":
image = image[:, :, 0]
image = image.astype(np.uint8)
image = np.asarray(Image.fromarray(image, mode=format).convert("RGB"))
return image
def _apply_exif_orientation(image):
"""
Applies the exif orientation correctly.
This code exists per the bug:
https://github.com/python-pillow/Pillow/issues/3973
with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
various methods, especially `tobytes`
Function based on:
https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
Args:
image (PIL.Image): a PIL image
Returns:
(PIL.Image): the PIL image with exif orientation applied, if applicable
"""
if not hasattr(image, "getexif"):
return image
try:
exif = image.getexif()
except Exception: # https://github.com/facebookresearch/detectron2/issues/1885
exif = None
if exif is None:
return image
orientation = exif.get(_EXIF_ORIENT)
method = {
2: Image.FLIP_LEFT_RIGHT,
3: Image.ROTATE_180,
4: Image.FLIP_TOP_BOTTOM,
5: Image.TRANSPOSE,
6: Image.ROTATE_270,
7: Image.TRANSVERSE,
8: Image.ROTATE_90,
}.get(orientation)
if method is not None:
return image.transpose(method)
return image
[docs]def read_image(file_name, format=None):
"""
Read an image into the given format.
Will apply rotation and flipping if the image has such exif information.
Args:
file_name (str): image file path
format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
Returns:
image (np.ndarray): an HWC image in the given format, which is 0-255, uint8 for
supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
"""
with PathManager.open(file_name, "rb") as f:
image = Image.open(f)
if format == ("RGB"):
image = image.convert("RGB")
return np.array(image)
else:
# work around this bug: https://github.com/python-pillow/Pillow/issues/3973
image = _apply_exif_orientation(image)
return convert_PIL_to_numpy(image, format)
[docs]def check_image_size(dataset_dict, image):
"""
Raise an error if the image does not match the size specified in the dict.
"""
if "width" in dataset_dict or "height" in dataset_dict:
image_wh = (image.shape[1], image.shape[0])
expected_wh = (dataset_dict["width"], dataset_dict["height"])
if not image_wh == expected_wh:
raise SizeMismatchError(
"Mismatched image shape{}, got {}, expect {}.".format(
" for image " + dataset_dict["file_name"]
if "file_name" in dataset_dict
else "",
image_wh,
expected_wh,
)
+ " Please check the width/height in your annotation."
)
# To ensure bbox always remap to original image size
if "width" not in dataset_dict:
dataset_dict["width"] = image.shape[1]
if "height" not in dataset_dict:
dataset_dict["height"] = image.shape[0]
[docs]def annotations_to_instances(annos, image_size, mask_format="polygon"):
"""
Create an :class:`Instances` object used by the models,
from instance annotations in the dataset dict.
Args:
annos (list[dict]): a list of instance annotations in one image, each
element for one instance.
image_size (tuple): height, width
Returns:
Instances:
It will contain fields "gt_boxes", "gt_classes",
"gt_masks", "gt_keypoints", if they can be obtained from `annos`.
This is the format that builtin models expect.
"""
boxes = [
BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
for obj in annos
]
target = Instances(image_size)
boxes = target.gt_boxes = Boxes(boxes)
boxes.clip(image_size)
classes = [obj["category_id"] for obj in annos]
classes = torch.tensor(classes, dtype=torch.int64)
target.gt_classes = classes
if len(annos) and "segmentation" in annos[0]:
segms = [obj["segmentation"] for obj in annos]
if mask_format == "polygon":
masks = PolygonMasks(segms)
else:
assert mask_format == "bitmask", mask_format
masks = []
for segm in segms:
if isinstance(segm, list):
# polygon
masks.append(polygons_to_bitmask(segm, *image_size))
elif isinstance(segm, dict):
# COCO RLE
masks.append(mask_util.decode(segm))
elif isinstance(segm, np.ndarray):
assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
segm.ndim)
# mask array
masks.append(segm)
else:
raise ValueError(
"Cannot convert segmentation of type '{}' to BitMasks!"
"Supported types are: polygons as list[list[float] or ndarray],"
" COCO-style RLE as a dict, or a full-image segmentation mask "
"as a 2D ndarray.".format(type(segm)))
# torch.from_numpy does not support array with negative stride.
masks = BitMasks(
torch.stack([
torch.from_numpy(np.ascontiguousarray(x)) for x in masks
]))
target.gt_masks = masks
if len(annos) and "keypoints" in annos[0]:
kpts = np.array([obj.get("keypoints", []) for obj in annos]) # (N, K, 3)
# Set all out-of-boundary points to "unlabeled"
kpts_xy = kpts[:, :, :2]
inside = (kpts_xy >= np.array([0, 0])) & (kpts_xy <= np.array(image_size[::-1]))
inside = inside.all(axis=2)
kpts[:, :, :2] = kpts_xy
kpts[:, :, 2][~inside] = 0
target.gt_keypoints = Keypoints(kpts)
return target
[docs]def annotations_to_instances_rotated(annos, image_size):
"""
Create an :class:`Instances` object used by the models,
from instance annotations in the dataset dict.
Compared to `annotations_to_instances`, this function is for rotated boxes only
Args:
annos (list[dict]): a list of instance annotations in one image, each
element for one instance.
image_size (tuple): height, width
Returns:
Instances:
Containing fields "gt_boxes", "gt_classes",
if they can be obtained from `annos`.
This is the format that builtin models expect.
"""
boxes = [obj["bbox"] for obj in annos]
target = Instances(image_size)
boxes = target.gt_boxes = RotatedBoxes(boxes)
boxes.clip(image_size)
classes = [obj["category_id"] for obj in annos]
classes = torch.tensor(classes, dtype=torch.int64)
target.gt_classes = classes
return target
[docs]def filter_empty_instances(instances, by_box=True, by_mask=True):
"""
Filter out empty instances in an `Instances` object.
Args:
instances (Instances):
by_box (bool): whether to filter out instances with empty boxes
by_mask (bool): whether to filter out instances with empty masks
Returns:
Instances: the filtered instances.
"""
assert by_box or by_mask
r = []
if by_box:
r.append(instances.gt_boxes.nonempty())
if instances.has("gt_masks") and by_mask:
r.append(instances.gt_masks.nonempty())
# TODO: can also filter visible keypoints
if not r:
return instances
m = r[0]
for x in r[1:]:
m = m & x
return instances[m]
[docs]def create_keypoint_hflip_indices(dataset_names, meta):
"""
Args:
dataset_names (list[str]): list of dataset names
Returns:
ndarray[int]: a vector of size=#keypoints, storing the
horizontally-flipped keypoint indices.
"""
check_metadata_consistency("keypoint_names", dataset_names, meta)
check_metadata_consistency("keypoint_flip_map", dataset_names, meta)
names = meta["keypoint_names"]
# TODO flip -> hflip
flip_map = dict(meta["keypoint_flip_map"])
flip_map.update({v: k for k, v in flip_map.items()})
flipped_names = [i if i not in flip_map else flip_map[i] for i in names]
flip_indices = [names.index(i) for i in flipped_names]
return np.asarray(flip_indices)
[docs]def check_sample_valid(args):
if args["sample_style"] == "range":
assert (
len(args["short_edge_length"]) == 2
), f"more than 2 ({len(args['short_edge_length'])}) " \
"short_edge_length(s) are provided for ranges"
[docs]def imdecode(data, *, require_chl3=True, require_alpha=False):
"""decode images in common formats (jpg, png, etc.)
:param data: encoded image data
:type data: :class:`bytes`
:param require_chl3: whether to convert gray image to 3-channel BGR image
:param require_alpha: whether to add alpha channel to BGR image
:rtype: :class:`numpy.ndarray`
"""
img = cv2.imdecode(np.fromstring(data, np.uint8), cv2.IMREAD_UNCHANGED)
if img is None and len(data) >= 3 and data[:3] == b'GIF':
# cv2 doesn't support GIF, try PIL
img = _gif_decode(data)
assert img is not None, 'failed to decode'
if img.ndim == 2 and require_chl3:
img = img.reshape(img.shape + (1, ))
if img.shape[2] == 1 and require_chl3:
img = np.tile(img, (1, 1, 3))
if img.ndim == 3 and img.shape[2] == 3 and require_alpha:
assert img.dtype == np.uint8
img = np.concatenate([img, np.ones_like(img[:, :, :1]) * 255], axis=2)
return img
def _gif_decode(data):
try:
import io
from PIL import Image
im = Image.open(io.BytesIO(data))
im = im.convert('RGB')
return cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)
except Exception:
return