3D-MOOD / vis4d /data /transforms /random_erasing.py
RoyYang0714's picture
feat: Try to build everything locally.
9b33fca
"""Random erasing data augmentation."""
import numpy as np
from vis4d.common.typing import NDArrayNumber
from vis4d.data.const import CommonKeys as K
from .base import Transform
@Transform(in_keys=K.images, out_keys=K.images)
class RandomErasing:
"""Randomly erase a rectangular region in an image tensor."""
def __init__(
self,
min_area: float = 0.02,
max_area: float = 0.4,
min_aspect_ratio: float = 0.3,
max_aspect_ratio: float = 1 / 0.3,
mean: tuple[float, float, float] = (0.0, 0.0, 0.0),
num_attempt: int = 10,
):
"""Creates an instance of RandomErasing.
Recommended to use this transform after normalization. The erased
region will be filled with the mean value. See
`https://arxiv.org/abs/1708.04896`.
Args:
min_area (float, optional): Minimum area of the erased region.
Defaults to 0.02.
max_area (float, optional): Maximum area of the erased region.
Defaults to 0.4.
min_aspect_ratio (float, optional): Minimum aspect ratio of the
erased region. Defaults to 0.3.
max_aspect_ratio (float, optional): Maximum aspect ratio of the
erased region. Defaults to 1 / 0.3.
mean (tuple[float, float, float], optional): Mean of the dataset.
Defaults to (0.0, 0.0, 0.0).
num_attempt (int, optional): Number of maximum attempts to find a
valid erased region. This is used to avoid infinite attempts of
resampling the region, though such cases are very unlikely to
happen. Defaults to 10.
Returns:
Callable: A function that takes a tensor of shape [N, H, W, C] and
returns a tensor of the same shape.
"""
self.min_area = min_area
self.max_area = max_area
self.min_aspect_ratio = min_aspect_ratio
self.max_aspect_ratio = max_aspect_ratio
self.mean = mean
self.num_attempt = num_attempt
def do_erasing(self, images: NDArrayNumber) -> NDArrayNumber:
"""Execute the random erasing."""
fill = np.array(self.mean)
for i in range(images.shape[0]):
image = images[i]
h, w = image.shape[0:2]
area = h * w
for _ in range(self.num_attempt):
target_area = (
np.random.uniform(self.min_area, self.max_area) * area
)
aspect_ratio = np.random.uniform(
self.min_aspect_ratio, self.max_aspect_ratio
)
h_erase = int(round(np.sqrt(target_area * aspect_ratio)))
w_erase = int(round(np.sqrt(target_area / aspect_ratio)))
if w_erase < w and h_erase < h:
x_erase = np.random.randint(0, w - w_erase)
y_erase = np.random.randint(0, h - h_erase)
image[
y_erase : y_erase + h_erase,
x_erase : x_erase + w_erase,
:,
] = fill
break
return images
def __call__(
self, images_list: list[NDArrayNumber]
) -> list[NDArrayNumber]:
"""Execute the transform."""
for i, images in enumerate(images_list):
images_list[i] = self.do_erasing(images)
return images_list