Source code for Augmentor.Operations

# Operations.py
# Author: Marcus D. Bloice <https://github.com/mdbloice> and contributors
# Licensed under the terms of the MIT Licence.
"""
The Operations module contains classes for all operations used by Augmentor.

The classes contained in this module are not called or instantiated directly
by the user, instead the user interacts with the
:class:`~Augmentor.Pipeline.Pipeline` class and uses the utility functions contained
there.

In this module, each operation is a subclass of type :class:`Operation`.
The :class:`~Augmentor.Pipeline.Pipeline` objects expect :class:`Operation`
types, and therefore all operations are of type :class:`Operation`, and
provide their own implementation of the :func:`~Operation.perform_operation`
function.

Hence, the documentation for this module is intended for developers who
wish to extend Augmentor or wish to see how operations function internally.

For detailed information on extending Augmentor, see :ref:`extendingaugmentor`.
"""
from __future__ import (absolute_import, division,
                        print_function, unicode_literals)
from builtins import *

from PIL import Image, ImageOps, ImageEnhance
import math
from math import floor, ceil

import numpy as np
# from skimage import img_as_ubyte
# from skimage import transform

import os
import random
import warnings

# Python 2-3 compatibility - not currently needed.
# try:
#    from StringIO import StringIO
# except ImportError:
#    from io import StringIO


[docs]class Operation(object): """ The class :class:`Operation` represents the base class for all operations that can be performed. Inherit from :class:`Operation`, overload its methods, and instantiate super to create a new operation. See the section on extending Augmentor with custom operations at :ref:`extendingaugmentor`. """ def __init__(self, probability): """ All operations must at least have a :attr:`probability` which is initialised when creating the operation's object. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :type probability: Float """ self.probability = probability def __str__(self): """ Used to display a string representation of the operation, which is used by the :func:`Pipeline.status` to display the current pipeline's operations in a human readable way. :return: A string representation of the operation. Can be overridden if required, for example as is done in the :class:`Rotate` class. """ return self.__class__.__name__
[docs] def perform_operation(self, images): """ Perform the operation on the passed images. Each operation must at least have this function, which accepts a list containing objects of type PIL.Image, performs its operation, and returns a new list containing objects of type PIL.Image. :param images: The image(s) to transform. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ raise RuntimeError("Illegal call to base class.")
[docs]class HistogramEqualisation(Operation): """ The class :class:`HistogramEqualisation` is used to perform histogram equalisation on images passed to its :func:`perform_operation` function. """ def __init__(self, probability): """ As there are no further user definable parameters, the class is instantiated using only the :attr:`probability` argument. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :type probability: Float """ Operation.__init__(self, probability)
[docs] def perform_operation(self, images): """ Performs histogram equalisation on the images passed as an argument and returns the equalised images. There are no user definable parameters for this method. :param images: The image(s) on which to perform the histogram equalisation. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ # If an image is a colour image, the histogram will # will be computed on the flattened image, which fires # a warning. # We may want to apply this instead to each colour channel. def do(image): with warnings.catch_warnings(): warnings.simplefilter("ignore") return ImageOps.equalize(image) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Greyscale(Operation): """ This class is used to convert images into greyscale. That is, it converts images into having only shades of grey (pixel value intensities) varying from 0 to 255 which represent black and white respectively. .. seealso:: The :class:`BlackAndWhite` class. """ def __init__(self, probability): """ As there are no further user definable parameters, the class is instantiated using only the :attr:`probability` argument. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :type probability: Float """ Operation.__init__(self, probability)
[docs] def perform_operation(self, images): """ Converts the passed image to greyscale and returns the transformed image. There are no user definable parameters for this method. :param images: The image to convert to greyscale. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ def do(image): return ImageOps.grayscale(image) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Invert(Operation): """ This class is used to negate images. That is to reverse the pixel values for any image processed by it. """ def __init__(self, probability): """ As there are no further user definable parameters, the class is instantiated using only the :attr:`probability` argument. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :type probability: Float """ Operation.__init__(self, probability)
[docs] def perform_operation(self, images): """ Negates the image passed as an argument. There are no user definable parameters for this method. :param images: The image(s) to negate. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ def do(image): return ImageOps.invert(image) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class BlackAndWhite(Operation): """ This class is used to convert images into black and white. In other words, into using a 1-bit, monochrome binary colour palette. This is not to be confused with greyscale, where an 8-bit greyscale pixel intensity range is used. .. seealso:: The :class:`Greyscale` class. """ def __init__(self, probability, threshold): """ As well as the required :attr:`probability` parameter, a :attr:`threshold` can also be defined to define the cutoff point where a pixel is converted to black or white. The :attr:`threshold` defaults to 128 at the user-facing :func:`~Augmentor.Pipeline.Pipeline.black_and_white` function. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param threshold: A value between 0 and 255 that defines the cut off point where an individual pixel is converted into black or white. :type probability: Float :type threshold: Integer """ Operation.__init__(self, probability) self.threshold = threshold
[docs] def perform_operation(self, images): """ Convert the image passed as an argument to black and white, 1-bit monochrome. Uses the :attr:`threshold` passed to the constructor to control the cut-off point where a pixel is converted to black or white. :param images: The image to convert into monochrome. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ def do(image): # An alternative would be to use # PIL.ImageOps.posterize(image=image, bits=1) # but this might be faster. image = ImageOps.grayscale(image) return image.point(lambda x: 0 if x < self.threshold else 255, '1') augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class RandomBrightness(Operation): """ This class is used to random change image brightness. """ def __init__(self, probability, min_factor, max_factor): """ required :attr:`probability` parameter :func:`~Augmentor.Pipeline.Pipeline.random_brightness` function. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param min_factor: The value between 0.0 and max_factor that define the minimum adjustment of image brightness. The value 0.0 gives a black image,The value 1.0 gives the original image, value bigger than 1.0 gives more bright image. :param max_factor: A value should be bigger than min_factor. that define the maximum adjustment of image brightness. The value 0.0 gives a black image, value 1.0 gives the original image, value bigger than 1.0 gives more bright image. :type probability: Float :type max_factor: Float :type max_factor: Float """ Operation.__init__(self, probability) self.min_factor = min_factor self.max_factor = max_factor
[docs] def perform_operation(self, images): """ Random change the passed image brightness. :param images: The image to convert into monochrome. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ factor = np.random.uniform(self.min_factor, self.max_factor) def do(image): image_enhancer_brightness = ImageEnhance.Brightness(image) return image_enhancer_brightness.enhance(factor) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class RandomColor(Operation): """ This class is used to random change saturation of an image. """ def __init__(self, probability, min_factor, max_factor): """ required :attr:`probability` parameter :func:`~Augmentor.Pipeline.Pipeline.random_color` function. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param min_factor: The value between 0.0 and max_factor that define the minimum adjustment of image saturation. The value 0.0 gives a black and white image, value 1.0 gives the original image. :param max_factor: A value should be bigger than min_factor. that define the maximum adjustment of image saturation. The value 0.0 gives a black and white image, value 1.0 gives the original image. :type probability: Float :type max_factor: Float :type max_factor: Float """ Operation.__init__(self, probability) self.min_factor = min_factor self.max_factor = max_factor
[docs] def perform_operation(self, images): """ Random change the passed image saturation. :param images: The image to convert into monochrome. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ factor = np.random.uniform(self.min_factor, self.max_factor) def do(image): image_enhancer_color = ImageEnhance.Color(image) return image_enhancer_color.enhance(factor) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class RandomContrast(Operation): """ This class is used to random change contrast of an image. """ def __init__(self, probability, min_factor,max_factor): """ required :attr:`probability` parameter :func:`~Augmentor.Pipeline.Pipeline.random_contrast` function. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param min_factor: The value between 0.0 and max_factor that define the minimum adjustment of image contrast. The value 0.0 gives s solid grey image, value 1.0 gives the original image. :param max_factor: A value should be bigger than min_factor. that define the maximum adjustment of image contrast. The value 0.0 gives s solid grey image, value 1.0 gives the original image. :type probability: Float :type max_factor: Float :type max_factor: Float """ Operation.__init__(self, probability) self.min_factor = min_factor self.max_factor = max_factor
[docs] def perform_operation(self, images): """ Random change the passed image contrast. :param images: The image to convert into monochrome. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ factor = np.random.uniform(self.min_factor, self.max_factor) def do(image): image_enhancer_contrast = ImageEnhance.Contrast(image) return image_enhancer_contrast.enhance(factor) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Skew(Operation): """ This class is used to perform perspective skewing on images. It allows for skewing from a total of 12 different perspectives. """ def __init__(self, probability, skew_type, magnitude): """ As well as the required :attr:`probability` parameter, the type of skew that is performed is controlled using a :attr:`skew_type` and a :attr:`magnitude` parameter. The :attr:`skew_type` controls the direction of the skew, while :attr:`magnitude` controls the degree to which the skew is performed. To see examples of the various skews, see :ref:`perspectiveskewing`. Images are skewed **in place** and an image of the same size is returned by this function. That is to say, that after a skew has been performed, the largest possible area of the same aspect ratio of the original image is cropped from the skewed image, and this is then resized to match the original image size. The :ref:`perspectiveskewing` section describes this in detail. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param skew_type: Must be one of ``TILT``, ``TILT_TOP_BOTTOM``, ``TILT_LEFT_RIGHT``, or ``CORNER``. - ``TILT`` will randomly skew either left, right, up, or down. Left or right means it skews on the x-axis while up and down means that it skews on the y-axis. - ``TILT_TOP_BOTTOM`` will randomly skew up or down, or in other words skew along the y-axis. - ``TILT_LEFT_RIGHT`` will randomly skew left or right, or in other words skew along the x-axis. - ``CORNER`` will randomly skew one **corner** of the image either along the x-axis or y-axis. This means in one of 8 different directions, randomly. To see examples of the various skews, see :ref:`perspectiveskewing`. :param magnitude: The degree to which the image is skewed. :type probability: Float :type skew_type: String :type magnitude: Integer """ Operation.__init__(self, probability) self.skew_type = skew_type self.magnitude = magnitude
[docs] def perform_operation(self, images): """ Perform the skew on the passed image(s) and returns the transformed image(s). Uses the :attr:`skew_type` and :attr:`magnitude` parameters to control the type of skew to perform as well as the degree to which it is performed. If a list of images is passed, they must have identical dimensions. This is checked when we add the ground truth directory using :func:`Pipeline.:func:`~Augmentor.Pipeline.Pipeline.ground_truth` function. However, if this check fails, the skew function will be skipped and a warning thrown, in order to avoid an exception. :param images: The image(s) to skew. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ # Width and height taken from first image in list. # This requires that all ground truth images in the list # have identical dimensions! w, h = images[0].size x1 = 0 x2 = h y1 = 0 y2 = w original_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2)] max_skew_amount = max(w, h) max_skew_amount = int(ceil(max_skew_amount * self.magnitude)) skew_amount = random.randint(1, max_skew_amount) # Old implementation, remove. # if not self.magnitude: # skew_amount = random.randint(1, max_skew_amount) # elif self.magnitude: # max_skew_amount /= self.magnitude # skew_amount = max_skew_amount if self.skew_type == "RANDOM": skew = random.choice(["TILT", "TILT_LEFT_RIGHT", "TILT_TOP_BOTTOM", "CORNER"]) else: skew = self.skew_type # We have two choices now: we tilt in one of four directions # or we skew a corner. if skew == "TILT" or skew == "TILT_LEFT_RIGHT" or skew == "TILT_TOP_BOTTOM": if skew == "TILT": skew_direction = random.randint(0, 3) elif skew == "TILT_LEFT_RIGHT": skew_direction = random.randint(0, 1) elif skew == "TILT_TOP_BOTTOM": skew_direction = random.randint(2, 3) if skew_direction == 0: # Left Tilt new_plane = [(y1, x1 - skew_amount), # Top Left (y2, x1), # Top Right (y2, x2), # Bottom Right (y1, x2 + skew_amount)] # Bottom Left elif skew_direction == 1: # Right Tilt new_plane = [(y1, x1), # Top Left (y2, x1 - skew_amount), # Top Right (y2, x2 + skew_amount), # Bottom Right (y1, x2)] # Bottom Left elif skew_direction == 2: # Forward Tilt new_plane = [(y1 - skew_amount, x1), # Top Left (y2 + skew_amount, x1), # Top Right (y2, x2), # Bottom Right (y1, x2)] # Bottom Left elif skew_direction == 3: # Backward Tilt new_plane = [(y1, x1), # Top Left (y2, x1), # Top Right (y2 + skew_amount, x2), # Bottom Right (y1 - skew_amount, x2)] # Bottom Left if skew == "CORNER": skew_direction = random.randint(0, 7) if skew_direction == 0: # Skew possibility 0 new_plane = [(y1 - skew_amount, x1), (y2, x1), (y2, x2), (y1, x2)] elif skew_direction == 1: # Skew possibility 1 new_plane = [(y1, x1 - skew_amount), (y2, x1), (y2, x2), (y1, x2)] elif skew_direction == 2: # Skew possibility 2 new_plane = [(y1, x1), (y2 + skew_amount, x1), (y2, x2), (y1, x2)] elif skew_direction == 3: # Skew possibility 3 new_plane = [(y1, x1), (y2, x1 - skew_amount), (y2, x2), (y1, x2)] elif skew_direction == 4: # Skew possibility 4 new_plane = [(y1, x1), (y2, x1), (y2 + skew_amount, x2), (y1, x2)] elif skew_direction == 5: # Skew possibility 5 new_plane = [(y1, x1), (y2, x1), (y2, x2 + skew_amount), (y1, x2)] elif skew_direction == 6: # Skew possibility 6 new_plane = [(y1, x1), (y2, x1), (y2, x2), (y1 - skew_amount, x2)] elif skew_direction == 7: # Skew possibility 7 new_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2 + skew_amount)] if self.skew_type == "ALL": # Not currently in use, as it makes little sense to skew by the same amount # in every direction if we have set magnitude manually. # It may make sense to keep this, if we ensure the skew_amount below is randomised # and cannot be manually set by the user. corners = dict() corners["top_left"] = (y1 - random.randint(1, skew_amount), x1 - random.randint(1, skew_amount)) corners["top_right"] = (y2 + random.randint(1, skew_amount), x1 - random.randint(1, skew_amount)) corners["bottom_right"] = (y2 + random.randint(1, skew_amount), x2 + random.randint(1, skew_amount)) corners["bottom_left"] = (y1 - random.randint(1, skew_amount), x2 + random.randint(1, skew_amount)) new_plane = [corners["top_left"], corners["top_right"], corners["bottom_right"], corners["bottom_left"]] # To calculate the coefficients required by PIL for the perspective skew, # see the following Stack Overflow discussion: https://goo.gl/sSgJdj matrix = [] for p1, p2 in zip(new_plane, original_plane): matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]]) matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]]) A = np.matrix(matrix, dtype=np.float) B = np.array(original_plane).reshape(8) perspective_skew_coefficients_matrix = np.dot(np.linalg.pinv(A), B) perspective_skew_coefficients_matrix = np.array(perspective_skew_coefficients_matrix).reshape(8) def do(image): return image.transform(image.size, Image.PERSPECTIVE, perspective_skew_coefficients_matrix, resample=Image.BICUBIC) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class RotateStandard(Operation): """ Class to perform rotations without automatically cropping the images, as opposed to the :class:`RotateRange` class. .. seealso:: For arbitrary rotations with automatic cropping, see the :class:`RotateRange` class. .. seealso:: For 90 degree rotations, see the :class:`Rotate` class. """ def __init__(self, probability, max_left_rotation, max_right_rotation, expand=False, fillcolor=None): """ Documentation to appear. """ Operation.__init__(self, probability) self.max_left_rotation = -abs(max_left_rotation) # Ensure always negative self.max_right_rotation = abs(max_right_rotation) # Ensure always positive self.expand = expand self.fillcolor = fillcolor
[docs] def perform_operation(self, images): """ Documentation to appear. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ random_left = random.randint(self.max_left_rotation, 0) random_right = random.randint(0, self.max_right_rotation) left_or_right = random.randint(0, 1) rotation = 0 if left_or_right == 0: rotation = random_left elif left_or_right == 1: rotation = random_right def do(image): return image.rotate(rotation, expand=self.expand, resample=Image.BICUBIC, fillcolor=self.fillcolor) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Rotate(Operation): """ This class is used to perform rotations on images in multiples of 90 degrees. Arbitrary rotations are handled by the :class:`RotateRange` class. """ def __init__(self, probability, rotation): """ As well as the required :attr:`probability` parameter, the :attr:`rotation` parameter controls the rotation to perform, which must be one of ``90``, ``180``, ``270`` or ``-1`` (see below). :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param rotation: Controls the rotation to perform. Must be one of ``90``, ``180``, ``270`` or ``-1``. - ``90`` rotate the image by 90 degrees. - ``180`` rotate the image by 180 degrees. - ``270`` rotate the image by 270 degrees. - ``-1`` rotate the image randomly by either 90, 180, or 270 degrees. .. seealso:: For arbitrary rotations, see the :class:`RotateRange` class. """ Operation.__init__(self, probability) self.rotation = rotation def __str__(self): return "Rotate " + str(self.rotation)
[docs] def perform_operation(self, images): """ Rotate an image by either 90, 180, or 270 degrees, or randomly from any of these. :param images: The image(s) to rotate. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ random_factor = random.randint(1, 3) def do(image): if self.rotation == -1: return image.rotate(90 * random_factor, expand=True) else: return image.rotate(self.rotation, expand=True) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class RotateRange(Operation): """ This class is used to perform rotations on images by arbitrary numbers of degrees. Images are rotated **in place** and an image of the same size is returned by this function. That is to say, that after a rotation has been performed, the largest possible area of the same aspect ratio of the original image is cropped from the skewed image, and this is then resized to match the original image size. The method by which this is performed is described as follows: .. math:: E = \\frac{\\frac{\\sin{\\theta_{a}}}{\\sin{\\theta_{b}}}\\Big(X-\\frac{\\sin{\\theta_{a}}}{\\sin{\\theta_{b}}} Y\\Big)}{1-\\frac{(\\sin{\\theta_{a}})^2}{(\\sin{\\theta_{b}})^2}} which describes how :math:`E` is derived, and then follows :math:`B = Y - E` and :math:`A = \\frac{\\sin{\\theta_{a}}}{\\sin{\\theta_{b}}} B`. The :ref:`rotating` section describes this in detail and has example images to demonstrate this. """ def __init__(self, probability, max_left_rotation, max_right_rotation): """ As well as the required :attr:`probability` parameter, the :attr:`max_left_rotation` parameter controls the maximum number of degrees by which to rotate to the left, while the :attr:`max_right_rotation` controls the maximum number of degrees to rotate to the right. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param max_left_rotation: The maximum number of degrees to rotate the image anti-clockwise. :param max_right_rotation: The maximum number of degrees to rotate the image clockwise. :type probability: Float :type max_left_rotation: Integer :type max_right_rotation: Integer """ Operation.__init__(self, probability) self.max_left_rotation = -abs(max_left_rotation) # Ensure always negative self.max_right_rotation = abs(max_right_rotation) # Ensure always positive
[docs] def perform_operation(self, images): """ Perform the rotation on the passed :attr:`image` and return the transformed image. Uses the :attr:`max_left_rotation` and :attr:`max_right_rotation` passed into the constructor to control the amount of degrees to rotate by. Whether the image is rotated clockwise or anti-clockwise is chosen at random. :param images: The image(s) to rotate. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ # TODO: Small rotations of 1 or 2 degrees can create black pixels random_left = random.randint(self.max_left_rotation, 0) random_right = random.randint(0, self.max_right_rotation) left_or_right = random.randint(0, 1) rotation = 0 if left_or_right == 0: rotation = random_left elif left_or_right == 1: rotation = random_right def do(image): # Get size before we rotate x = image.size[0] y = image.size[1] # Rotate, while expanding the canvas size image = image.rotate(rotation, expand=True, resample=Image.BICUBIC) # Get size after rotation, which includes the empty space X = image.size[0] Y = image.size[1] # Get our two angles needed for the calculation of the largest area angle_a = abs(rotation) angle_b = 90 - angle_a # Python deals in radians so get our radians angle_a_rad = math.radians(angle_a) angle_b_rad = math.radians(angle_b) # Calculate the sins angle_a_sin = math.sin(angle_a_rad) angle_b_sin = math.sin(angle_b_rad) # Find the maximum area of the rectangle that could be cropped E = (math.sin(angle_a_rad)) / (math.sin(angle_b_rad)) * \ (Y - X * (math.sin(angle_a_rad) / math.sin(angle_b_rad))) E = E / 1 - (math.sin(angle_a_rad) ** 2 / math.sin(angle_b_rad) ** 2) B = X - E A = (math.sin(angle_a_rad) / math.sin(angle_b_rad)) * B # Crop this area from the rotated image # image = image.crop((E, A, X - E, Y - A)) image = image.crop((int(round(E)), int(round(A)), int(round(X - E)), int(round(Y - A)))) # Return the image, re-sized to the size of the image passed originally return image.resize((x, y), resample=Image.BICUBIC) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Resize(Operation): """ This class is used to resize images by absolute values passed as parameters. """ def __init__(self, probability, width, height, resample_filter): """ Accepts the required probability parameter as well as parameters to control the size of the transformed image. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param width: The width in pixels to resize the image to. :param height: The height in pixels to resize the image to. :param resample_filter: The resample filter to use. Must be one of the standard PIL types, i.e. ``NEAREST``, ``BICUBIC``, ``ANTIALIAS``, or ``BILINEAR``. :type probability: Float :type width: Integer :type height: Integer :type resample_filter: String """ Operation.__init__(self, probability) self.width = width self.height = height self.resample_filter = resample_filter
[docs] def perform_operation(self, images): """ Resize the passed image and returns the resized image. Uses the parameters passed to the constructor to resize the passed image. :param images: The image to resize. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ def do(image): # TODO: Automatically change this to ANTIALIAS or BICUBIC depending on the size of the file return image.resize((self.width, self.height), eval("Image.%s" % self.resample_filter)) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Flip(Operation): """ This class is used to mirror images through the x or y axes. The class allows an image to be mirrored along either its x axis or its y axis, or randomly. """ def __init__(self, probability, top_bottom_left_right): """ The direction of the flip, or whether it should be randomised, is controlled using the :attr:`top_bottom_left_right` parameter. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param top_bottom_left_right: Controls the direction the image should be mirrored. Must be one of ``LEFT_RIGHT``, ``TOP_BOTTOM``, or ``RANDOM``. - ``LEFT_RIGHT`` defines that the image is mirrored along its x axis. - ``TOP_BOTTOM`` defines that the image is mirrored along its y axis. - ``RANDOM`` defines that the image is mirrored randomly along either the x or y axis. """ Operation.__init__(self, probability) self.top_bottom_left_right = top_bottom_left_right
[docs] def perform_operation(self, images): """ Mirror the image according to the `attr`:top_bottom_left_right` argument passed to the constructor and return the mirrored image. :param images: The image(s) to mirror. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ random_axis = random.randint(0, 1) def do(image): if self.top_bottom_left_right == "LEFT_RIGHT": return image.transpose(Image.FLIP_LEFT_RIGHT) elif self.top_bottom_left_right == "TOP_BOTTOM": return image.transpose(Image.FLIP_TOP_BOTTOM) elif self.top_bottom_left_right == "RANDOM": if random_axis == 0: return image.transpose(Image.FLIP_LEFT_RIGHT) elif random_axis == 1: return image.transpose(Image.FLIP_TOP_BOTTOM) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Crop(Operation): """ This class is used to crop images by absolute values passed as parameters. """ def __init__(self, probability, width, height, centre): """ As well as the always required :attr:`probability` parameter, the constructor requires a :attr:`width` to control the width of of the area to crop as well as a :attr:`height` parameter to control the height of the area to crop. Also, whether the area to crop should be taken from the centre of the image or from a random location within the image is toggled using :attr:`centre`. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param width: The width in pixels of the area to crop from the image. :param height: The height in pixels of the area to crop from the image. :param centre: Whether to crop from the centre of the image or a random location within the image, while maintaining the size of the crop without cropping out of the original image's area. :type probability: Float :type width: Integer :type height: Integer :type centre: Boolean """ Operation.__init__(self, probability) self.width = width self.height = height self.centre = centre
[docs] def perform_operation(self, images): """ Crop an area from an image, either from a random location or centred, using the dimensions supplied during instantiation. :param images: The image(s) to crop the area from. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ w, h = images[0].size # All images must be the same size, so we can just check the first image in the list left_shift = random.randint(0, int((w - self.width))) down_shift = random.randint(0, int((h - self.height))) def do(image): # TODO: Fix. We may want a full crop. if self.width > w or self.height > h: return image if self.centre: return image.crop(((w/2)-(self.width/2), (h/2)-(self.height/2), (w/2)+(self.width/2), (h/2)+(self.height/2))) else: return image.crop((left_shift, down_shift, self.width + left_shift, self.height + down_shift)) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class CropPercentage(Operation): """ This class is used to crop images by a percentage of their area. """ def __init__(self, probability, percentage_area, centre, randomise_percentage_area): """ As well as the always required :attr:`probability` parameter, the constructor requires a :attr:`percentage_area` to control the area of the image to crop in terms of its percentage of the original image, and a :attr:`centre` parameter toggle whether a random area or the centre of the images should be cropped. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param percentage_area: The percentage area of the original image to crop. A value of 0.5 would crop an area that is 50% of the area of the original image's size. :param centre: Whether to crop from the centre of the image or crop a random location within the image. :type probability: Float :type percentage_area: Float :type centre: Boolean """ Operation.__init__(self, probability) self.percentage_area = percentage_area self.centre = centre self.randomise_percentage_area = randomise_percentage_area
[docs] def perform_operation(self, images): """ Crop the passed :attr:`images` by percentage area, returning the crop as an image. :param images: The image(s) to crop an area from. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ if self.randomise_percentage_area: r_percentage_area = round(random.uniform(0.1, self.percentage_area), 2) else: r_percentage_area = self.percentage_area # The images must be of identical size, which is checked by Pipeline.ground_truth(). w, h = images[0].size w_new = int(floor(w * r_percentage_area)) # TODO: Floor might return 0, so we need to check this. h_new = int(floor(h * r_percentage_area)) left_shift = random.randint(0, int((w - w_new))) down_shift = random.randint(0, int((h - h_new))) def do(image): if self.centre: return image.crop(((w/2)-(w_new/2), (h/2)-(h_new/2), (w/2)+(w_new/2), (h/2)+(h_new/2))) else: return image.crop((left_shift, down_shift, w_new + left_shift, h_new + down_shift)) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class CropRandom(Operation): """ .. warning:: This :class:`CropRandom` class is currently not used by any of the user-facing functions in the :class:`~Augmentor.Pipeline.Pipeline` class. """ def __init__(self, probability, percentage_area): """ :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param percentage_area: The percentage area of the original image to crop. A value of 0.5 would crop an area that is 50% of the area of the original image's size. """ Operation.__init__(self, probability) self.percentage_area = percentage_area
[docs] def perform_operation(self, images): """ Randomly crop the passed image, returning the crop as a new image. :param images: The image to crop. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ w, h = images[0].size w_new = int(floor(w * self.percentage_area)) h_new = int(floor(h * self.percentage_area)) random_left_shift = random.randint(0, int((w - w_new))) # Note: randint() is from uniform distribution. random_down_shift = random.randint(0, int((h - h_new))) def do(image): return image.crop((random_left_shift, random_down_shift, w_new + random_left_shift, h_new + random_down_shift)) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Shear(Operation): """ This class is used to shear images, that is to tilt them in a certain direction. Tilting can occur along either the x- or y-axis and in both directions (i.e. left or right along the x-axis, up or down along the y-axis). Images are sheared **in place** and an image of the same size as the input image is returned by this class. That is to say, that after a shear has been performed, the largest possible area of the same aspect ratio of the original image is cropped from the sheared image, and this is then resized to match the original image size. The :ref:`shearing` section describes this in detail. For sample code with image examples see :ref:`shearing`. """ def __init__(self, probability, max_shear_left, max_shear_right): """ The shearing is randomised in magnitude, from 0 to the :attr:`max_shear_left` or 0 to :attr:`max_shear_right` where the direction is randomised. The shear axis is also randomised i.e. if it shears up/down along the y-axis or left/right along the x-axis. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param max_shear_left: The maximum shear to the left. :param max_shear_right: The maximum shear to the right. :type probability: Float :type max_shear_left: Integer :type max_shear_right: Integer """ Operation.__init__(self, probability) self.max_shear_left = max_shear_left self.max_shear_right = max_shear_right
[docs] def perform_operation(self, images): """ Shears the passed image according to the parameters defined during instantiation, and returns the sheared image. :param images: The image to shear. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ ###################################################################### # Old version which uses SciKit Image ###################################################################### # We will use scikit-image for this so first convert to a matrix # using NumPy # amount_to_shear = round(random.uniform(self.max_shear_left, self.max_shear_right), 2) # image_array = np.array(image) # And here we are using SciKit Image's `transform` class. # shear_transformer = transform.AffineTransform(shear=amount_to_shear) # image_sheared = transform.warp(image_array, shear_transformer) # # Because of warnings # with warnings.catch_warnings(): # warnings.simplefilter("ignore") # return Image.fromarray(img_as_ubyte(image_sheared)) ###################################################################### width, height = images[0].size # For testing. # max_shear_left = 20 # max_shear_right = 20 angle_to_shear = int(random.uniform((abs(self.max_shear_left)*-1) - 1, self.max_shear_right + 1)) if angle_to_shear != -1: angle_to_shear += 1 # Alternative method # Calculate our offset when cropping # We know one angle, phi (angle_to_shear) # We known theta = 180-90-phi # We know one side, opposite (height of image) # Adjacent is therefore: # tan(theta) = opposite / adjacent # A = opposite / tan(theta) # theta = math.radians(180-90-angle_to_shear) # A = height / math.tan(theta) # Transformation matrices can be found here: # https://en.wikipedia.org/wiki/Transformation_matrix # The PIL affine transform expects the first two rows of # any of the affine transformation matrices, seen here: # https://en.wikipedia.org/wiki/Transformation_matrix#/media/File:2D_affine_transformation_matrix.svg directions = ["x", "y"] direction = random.choice(directions) def do(image): # We use the angle phi in radians later phi = math.tan(math.radians(angle_to_shear)) if direction == "x": # Here we need the unknown b, where a is # the height of the image and phi is the # angle we want to shear (our knowns): # b = tan(phi) * a shift_in_pixels = phi * height if shift_in_pixels > 0: shift_in_pixels = math.ceil(shift_in_pixels) else: shift_in_pixels = math.floor(shift_in_pixels) # For negative tilts, we reverse phi and set offset to 0 # Also matrix offset differs from pixel shift for neg # but not for pos so we will copy this value in case # we need to change it matrix_offset = shift_in_pixels if angle_to_shear <= 0: shift_in_pixels = abs(shift_in_pixels) matrix_offset = 0 phi = abs(phi) * -1 # Note: PIL expects the inverse scale, so 1/scale_factor for example. transform_matrix = (1, phi, -matrix_offset, 0, 1, 0) image = image.transform((int(round(width + shift_in_pixels)), height), Image.AFFINE, transform_matrix, Image.BICUBIC) image = image.crop((abs(shift_in_pixels), 0, width, height)) return image.resize((width, height), resample=Image.BICUBIC) elif direction == "y": shift_in_pixels = phi * width matrix_offset = shift_in_pixels if angle_to_shear <= 0: shift_in_pixels = abs(shift_in_pixels) matrix_offset = 0 phi = abs(phi) * -1 transform_matrix = (1, 0, 0, phi, 1, -matrix_offset) image = image.transform((width, int(round(height + shift_in_pixels))), Image.AFFINE, transform_matrix, Image.BICUBIC) image = image.crop((0, abs(shift_in_pixels), width, height)) return image.resize((width, height), resample=Image.BICUBIC) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Scale(Operation): """ This class is used to increase or decrease images in size by a certain factor, while maintaining the aspect ratio of the original image. .. seealso:: The :class:`Resize` class for resizing images by **dimensions**, and hence will not necessarily maintain the aspect ratio. This function will return images that are **larger** than the input images. """ def __init__(self, probability, scale_factor): """ As the aspect ratio is always kept constant, only a :attr:`scale_factor` is required for scaling the image. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param scale_factor: The factor by which to scale, where 1.5 would result in an image scaled up by 150%. :type probability: Float :type scale_factor: Float """ Operation.__init__(self, probability) self.scale_factor = scale_factor
[docs] def perform_operation(self, images): """ Scale the passed :attr:`images` by the factor specified during instantiation, returning the scaled image. :param images: The image to scale. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ def do(image): w, h = image.size new_h = int(h * self.scale_factor) new_w = int(w * self.scale_factor) return image.resize((new_w, new_h), resample=Image.BICUBIC) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Distort(Operation): """ This class performs randomised, elastic distortions on images. """ def __init__(self, probability, grid_width, grid_height, magnitude): """ As well as the probability, the granularity of the distortions produced by this class can be controlled using the width and height of the overlaying distortion grid. The larger the height and width of the grid, the smaller the distortions. This means that larger grid sizes can result in finer, less severe distortions. As well as this, the magnitude of the distortions vectors can also be adjusted. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param grid_width: The width of the gird overlay, which is used by the class to apply the transformations to the image. :param grid_height: The height of the gird overlay, which is used by the class to apply the transformations to the image. :param magnitude: Controls the degree to which each distortion is applied to the overlaying distortion grid. :type probability: Float :type grid_width: Integer :type grid_height: Integer :type magnitude: Integer """ Operation.__init__(self, probability) self.grid_width = grid_width self.grid_height = grid_height self.magnitude = abs(magnitude) # TODO: Implement non-random magnitude. self.randomise_magnitude = True
[docs] def perform_operation(self, images): """ Distorts the passed image(s) according to the parameters supplied during instantiation, returning the newly distorted image. :param images: The image(s) to be distorted. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ w, h = images[0].size horizontal_tiles = self.grid_width vertical_tiles = self.grid_height width_of_square = int(floor(w / float(horizontal_tiles))) height_of_square = int(floor(h / float(vertical_tiles))) width_of_last_square = w - (width_of_square * (horizontal_tiles - 1)) height_of_last_square = h - (height_of_square * (vertical_tiles - 1)) dimensions = [] for vertical_tile in range(vertical_tiles): for horizontal_tile in range(horizontal_tiles): if vertical_tile == (vertical_tiles - 1) and horizontal_tile == (horizontal_tiles - 1): dimensions.append([horizontal_tile * width_of_square, vertical_tile * height_of_square, width_of_last_square + (horizontal_tile * width_of_square), height_of_last_square + (height_of_square * vertical_tile)]) elif vertical_tile == (vertical_tiles - 1): dimensions.append([horizontal_tile * width_of_square, vertical_tile * height_of_square, width_of_square + (horizontal_tile * width_of_square), height_of_last_square + (height_of_square * vertical_tile)]) elif horizontal_tile == (horizontal_tiles - 1): dimensions.append([horizontal_tile * width_of_square, vertical_tile * height_of_square, width_of_last_square + (horizontal_tile * width_of_square), height_of_square + (height_of_square * vertical_tile)]) else: dimensions.append([horizontal_tile * width_of_square, vertical_tile * height_of_square, width_of_square + (horizontal_tile * width_of_square), height_of_square + (height_of_square * vertical_tile)]) # For loop that generates polygons could be rewritten, but maybe harder to read? # polygons = [x1,y1, x1,y2, x2,y2, x2,y1 for x1,y1, x2,y2 in dimensions] # last_column = [(horizontal_tiles - 1) + horizontal_tiles * i for i in range(vertical_tiles)] last_column = [] for i in range(vertical_tiles): last_column.append((horizontal_tiles-1)+horizontal_tiles*i) last_row = range((horizontal_tiles * vertical_tiles) - horizontal_tiles, horizontal_tiles * vertical_tiles) polygons = [] for x1, y1, x2, y2 in dimensions: polygons.append([x1, y1, x1, y2, x2, y2, x2, y1]) polygon_indices = [] for i in range((vertical_tiles * horizontal_tiles) - 1): if i not in last_row and i not in last_column: polygon_indices.append([i, i + 1, i + horizontal_tiles, i + 1 + horizontal_tiles]) for a, b, c, d in polygon_indices: dx = random.randint(-self.magnitude, self.magnitude) dy = random.randint(-self.magnitude, self.magnitude) x1, y1, x2, y2, x3, y3, x4, y4 = polygons[a] polygons[a] = [x1, y1, x2, y2, x3 + dx, y3 + dy, x4, y4] x1, y1, x2, y2, x3, y3, x4, y4 = polygons[b] polygons[b] = [x1, y1, x2 + dx, y2 + dy, x3, y3, x4, y4] x1, y1, x2, y2, x3, y3, x4, y4 = polygons[c] polygons[c] = [x1, y1, x2, y2, x3, y3, x4 + dx, y4 + dy] x1, y1, x2, y2, x3, y3, x4, y4 = polygons[d] polygons[d] = [x1 + dx, y1 + dy, x2, y2, x3, y3, x4, y4] generated_mesh = [] for i in range(len(dimensions)): generated_mesh.append([dimensions[i], polygons[i]]) def do(image): return image.transform(image.size, Image.MESH, generated_mesh, resample=Image.BICUBIC) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class GaussianDistortion(Operation): """ This class performs randomised, elastic gaussian distortions on images. """ def __init__(self, probability, grid_width, grid_height, magnitude, corner, method, mex, mey, sdx, sdy): """ As well as the probability, the granularity of the distortions produced by this class can be controlled using the width and height of the overlaying distortion grid. The larger the height and width of the grid, the smaller the distortions. This means that larger grid sizes can result in finer, less severe distortions. As well as this, the magnitude of the distortions vectors can also be adjusted. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param grid_width: The width of the gird overlay, which is used by the class to apply the transformations to the image. :param grid_height: The height of the gird overlay, which is used by the class to apply the transformations to the image. :param magnitude: Controls the degree to which each distortion is applied to the overlaying distortion grid. :param corner: which corner of picture to distort. Possible values: "bell"(circular surface applied), "ul"(upper left), "ur"(upper right), "dl"(down left), "dr"(down right). :param method: possible values: "in"(apply max magnitude to the chosen corner), "out"(inverse of method in). :param mex: used to generate 3d surface for similar distortions. Surface is based on normal distribution. :param mey: used to generate 3d surface for similar distortions. Surface is based on normal distribution. :param sdx: used to generate 3d surface for similar distortions. Surface is based on normal distribution. :param sdy: used to generate 3d surface for similar distortions. Surface is based on normal distribution. :type probability: Float :type grid_width: Integer :type grid_height: Integer :type magnitude: Integer :type corner: String :type method: String :type mex: Float :type mey: Float :type sdx: Float :type sdy: Float For values :attr:`mex`, :attr:`mey`, :attr:`sdx`, and :attr:`sdy` the surface is based on the normal distribution: .. math:: e^{- \Big( \\frac{(x-\\text{mex})^2}{\\text{sdx}} + \\frac{(y-\\text{mey})^2}{\\text{sdy}} \Big) } """ Operation.__init__(self, probability) self.grid_width = grid_width self.grid_height = grid_height self.magnitude = abs(magnitude) self.randomise_magnitude = True self.corner = corner self.method = method self.mex = mex self.mey = mey self.sdx = sdx self.sdy = sdy
[docs] def perform_operation(self, images): """ Distorts the passed image(s) according to the parameters supplied during instantiation, returning the newly distorted image. :param images: The image(s) to be distorted. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ w, h = images[0].size horizontal_tiles = self.grid_width vertical_tiles = self.grid_height width_of_square = int(floor(w / float(horizontal_tiles))) height_of_square = int(floor(h / float(vertical_tiles))) width_of_last_square = w - (width_of_square * (horizontal_tiles - 1)) height_of_last_square = h - (height_of_square * (vertical_tiles - 1)) dimensions = [] for vertical_tile in range(vertical_tiles): for horizontal_tile in range(horizontal_tiles): if vertical_tile == (vertical_tiles - 1) and horizontal_tile == (horizontal_tiles - 1): dimensions.append([horizontal_tile * width_of_square, vertical_tile * height_of_square, width_of_last_square + (horizontal_tile * width_of_square), height_of_last_square + (height_of_square * vertical_tile)]) elif vertical_tile == (vertical_tiles - 1): dimensions.append([horizontal_tile * width_of_square, vertical_tile * height_of_square, width_of_square + (horizontal_tile * width_of_square), height_of_last_square + (height_of_square * vertical_tile)]) elif horizontal_tile == (horizontal_tiles - 1): dimensions.append([horizontal_tile * width_of_square, vertical_tile * height_of_square, width_of_last_square + (horizontal_tile * width_of_square), height_of_square + (height_of_square * vertical_tile)]) else: dimensions.append([horizontal_tile * width_of_square, vertical_tile * height_of_square, width_of_square + (horizontal_tile * width_of_square), height_of_square + (height_of_square * vertical_tile)]) last_column = [] for i in range(vertical_tiles): last_column.append((horizontal_tiles-1)+horizontal_tiles*i) last_row = range((horizontal_tiles * vertical_tiles) - horizontal_tiles, horizontal_tiles * vertical_tiles) polygons = [] for x1, y1, x2, y2 in dimensions: polygons.append([x1, y1, x1, y2, x2, y2, x2, y1]) polygon_indices = [] for i in range((vertical_tiles * horizontal_tiles) - 1): if i not in last_row and i not in last_column: polygon_indices.append([i, i + 1, i + horizontal_tiles, i + 1 + horizontal_tiles]) def sigmoidf(x, y, sdx=0.05, sdy=0.05, mex=0.5, mey=0.5, const=1): sigmoid = lambda x1, y1: (const * (math.exp(-(((x1-mex)**2)/sdx + ((y1-mey)**2)/sdy) )) + max(0,-const) - max(0, const)) xl = np.linspace(0,1) yl = np.linspace(0, 1) X, Y = np.meshgrid(xl, yl) Z = np.vectorize(sigmoid)(X, Y) mino = np.amin(Z) maxo = np.amax(Z) res = sigmoid(x, y) res = max(((((res - mino) * (1 - 0)) / (maxo - mino)) + 0), 0.01)*self.magnitude return res def corner(x, y, corner="ul", method="out", sdx=0.05, sdy=0.05, mex=0.5, mey=0.5): ll = {'dr': (0, 0.5, 0, 0.5), 'dl': (0.5, 1, 0, 0.5), 'ur': (0, 0.5, 0.5, 1), 'ul': (0.5, 1, 0.5, 1), 'bell': (0, 1, 0, 1)} new_c = ll[corner] new_x = (((x - 0) * (new_c[1] - new_c[0])) / (1 - 0)) + new_c[0] new_y = (((y - 0) * (new_c[3] - new_c[2])) / (1 - 0)) + new_c[2] if method == "in": const = 1 else: if method == "out": const =- 1 else: const = 1 res = sigmoidf(x=new_x, y=new_y,sdx=sdx, sdy=sdy, mex=mex, mey=mey, const=const) return res def do(image): for a, b, c, d in polygon_indices: x1, y1, x2, y2, x3, y3, x4, y4 = polygons[a] sigmax = corner(x=x3/w, y=y3/h, corner=self.corner, method=self.method, sdx=self.sdx, sdy=self.sdy, mex=self.mex, mey=self.mey) dx = np.random.normal(0, sigmax, 1)[0] dy = np.random.normal(0, sigmax, 1)[0] polygons[a] = [x1, y1, x2, y2, x3 + dx, y3 + dy, x4, y4] x1, y1, x2, y2, x3, y3, x4, y4 = polygons[b] polygons[b] = [x1, y1, x2 + dx, y2 + dy, x3, y3, x4, y4] x1, y1, x2, y2, x3, y3, x4, y4 = polygons[c] polygons[c] = [x1, y1, x2, y2, x3, y3, x4 + dx, y4 + dy] x1, y1, x2, y2, x3, y3, x4, y4 = polygons[d] polygons[d] = [x1 + dx, y1 + dy, x2, y2, x3, y3, x4, y4] generated_mesh = [] for i in range(len(dimensions)): generated_mesh.append([dimensions[i], polygons[i]]) return image.transform(image.size, Image.MESH, generated_mesh, resample=Image.BICUBIC) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Zoom(Operation): """ This class is used to enlarge images (to zoom) but to return a cropped region of the zoomed image of the same size as the original image. """ def __init__(self, probability, min_factor, max_factor): """ The amount of zoom applied is randomised, from between :attr:`min_factor` and :attr:`max_factor`. Set these both to the same value to always zoom by a constant factor. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param min_factor: The minimum amount of zoom to apply. Set both the :attr:`min_factor` and :attr:`min_factor` to the same values to zoom by a constant factor. :param max_factor: The maximum amount of zoom to apply. Set both the :attr:`min_factor` and :attr:`min_factor` to the same values to zoom by a constant factor. :type probability: Float :type min_factor: Float :type max_factor: Float """ Operation.__init__(self, probability) self.min_factor = min_factor self.max_factor = max_factor
[docs] def perform_operation(self, images): """ Zooms/scales the passed image(s) and returns the new image. :param images: The image(s) to be zoomed. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ factor = round(random.uniform(self.min_factor, self.max_factor), 2) def do(image): w, h = image.size image_zoomed = image.resize((int(round(image.size[0] * factor)), int(round(image.size[1] * factor))), resample=Image.BICUBIC) w_zoomed, h_zoomed = image_zoomed.size return image_zoomed.crop((floor((float(w_zoomed) / 2) - (float(w) / 2)), floor((float(h_zoomed) / 2) - (float(h) / 2)), floor((float(w_zoomed) / 2) + (float(w) / 2)), floor((float(h_zoomed) / 2) + (float(h) / 2)))) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class ZoomRandom(Operation): """ This class is used to zoom into random areas of the image. """ def __init__(self, probability, percentage_area, randomise): """ Zooms into a random area of the image, rather than the centre of the image, as is done by :class:`Zoom`. The zoom factor is fixed unless :attr:`randomise` is set to ``True``. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param percentage_area: A value between 0.1 and 1 that represents the area that will be cropped, with 1 meaning the entire area of the image will be cropped and 0.1 mean 10% of the area of the image will be cropped, before zooming. :param randomise: If ``True``, uses the :attr:`percentage_area` as an upper bound, and randomises the zoom level from between 0.1 and :attr:`percentage_area`. """ Operation.__init__(self, probability) self.percentage_area = percentage_area self.randomise = randomise
[docs] def perform_operation(self, images): """ Randomly zoom into the passed :attr:`images` by first cropping the image based on the :attr:`percentage_area` argument, and then resizing the image to match the size of the input area. Effectively, you are zooming in on random areas of the image. :param images: The image to crop an area from. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ if self.randomise: r_percentage_area = round(random.uniform(0.1, self.percentage_area), 2) else: r_percentage_area = self.percentage_area w, h = images[0].size w_new = int(floor(w * r_percentage_area)) h_new = int(floor(h * r_percentage_area)) random_left_shift = random.randint(0, (w - w_new)) # Note: randint() is from uniform distribution. random_down_shift = random.randint(0, (h - h_new)) def do(image): image = image.crop((random_left_shift, random_down_shift, w_new + random_left_shift, h_new + random_down_shift)) return image.resize((w, h), resample=Image.BICUBIC) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class HSVShifting(Operation): """ CURRENTLY NOT IMPLEMENTED. """ def __init__(self, probability, hue_shift, saturation_scale, saturation_shift, value_scale, value_shift): Operation.__init__(self, probability) self.hue_shift = hue_shift self.saturation_scale = saturation_scale self.saturation_shift = saturation_shift self.value_scale = value_scale self.value_shift = value_shift
[docs] def perform_operation(self, images): def do(image): hsv = np.array(image.convert("HSV"), 'float64') hsv /= 255. hsv[..., 0] += np.random.uniform(-self.hue_shift, self.hue_shift) hsv[..., 1] *= np.random.uniform(1 / (1 + self.saturation_scale), 1 + self.saturation_scale) hsv[..., 1] += np.random.uniform(-self.saturation_shift, self.saturation_shift) hsv[..., 2] *= np.random.uniform(1 / (1 + self.value_scale), 1 + self.value_scale) hsv[..., 2] += np.random.uniform(-self.value_shift, self.value_shift) hsv.clip(0, 1, hsv) hsv = np.uint8(np.round(hsv * 255.)) return Image.fromarray(hsv, "HSV").convert("RGB") augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class RandomErasing(Operation): """ Class that performs Random Erasing, an augmentation technique described in `https://arxiv.org/abs/1708.04896 <https://arxiv.org/abs/1708.04896>`_ by Zhong et al. To quote the authors, random erasing: "*... randomly selects a rectangle region in an image, and erases its pixels with random values.*" Exactly this is provided by this class. Random Erasing can make a trained neural network more robust to occlusion. """ def __init__(self, probability, rectangle_area): """ The size of the random rectangle is controlled using the :attr:`rectangle_area` parameter. This area is random in its width and height. :param probability: The probability that the operation will be performed. :param rectangle_area: The percentage are of the image to occlude. """ Operation.__init__(self, probability) self.rectangle_area = rectangle_area
[docs] def perform_operation(self, images): """ Adds a random noise rectangle to a random area of the passed image, returning the original image with this rectangle superimposed. :param images: The image(s) to add a random noise rectangle to. :type images: List containing PIL.Image object(s). :return: The transformed image(s) as a list of object(s) of type PIL.Image. """ def do(image): w, h = image.size w_occlusion_max = int(w * self.rectangle_area) h_occlusion_max = int(h * self.rectangle_area) w_occlusion_min = int(w * 0.1) h_occlusion_min = int(h * 0.1) w_occlusion = random.randint(w_occlusion_min, w_occlusion_max) h_occlusion = random.randint(h_occlusion_min, h_occlusion_max) if len(image.getbands()) == 1: rectangle = Image.fromarray(np.uint8(np.random.rand(w_occlusion, h_occlusion) * 255)) else: rectangle = Image.fromarray(np.uint8(np.random.rand(w_occlusion, h_occlusion, len(image.getbands())) * 255)) random_position_x = random.randint(0, w - w_occlusion) random_position_y = random.randint(0, h - h_occlusion) image.paste(rectangle, (random_position_x, random_position_y)) return image augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Custom(Operation): """ Class that allows for a custom operation to be performed using Augmentor's standard :class:`~Augmentor.Pipeline.Pipeline` object. """ def __init__(self, probability, custom_function, **function_arguments): """ Creates a custom operation that can be added to a pipeline. To add a custom operation you can instantiate this class, passing a function pointer, :attr:`custom_function`, followed by an arbitrarily long list keyword arguments, :attr:`\*\*function_arguments`. .. seealso:: The :func:`~Augmentor.Pipeline.Pipeline.add_operation` function. :param probability: The probability that the operation will be performed. :param custom_function: The name of the function that performs your custom code. Must return an Image object and accept an Image object as its first parameter. :param function_arguments: The arguments for your custom operation's code. :type probability: Float :type custom_function: \*Function :type function_arguments: dict """ Operation.__init__(self, probability) self.custom_function = custom_function self.function_arguments = function_arguments def __str__(self): return "Custom (" + self.custom_function.__name__ + ")"
[docs] def perform_operation(self, images): """ Perform the custom operation on the passed image(s), returning the transformed image(s). :param images: The image to perform the custom operation on. :return: The transformed image(s) (other functions in the pipeline will expect an image of type PIL.Image) """ return self.function_name(images, **self.function_arguments)
[docs]class ZoomGroundTruth(Operation): """ This class is used to enlarge images (to zoom) but to return a cropped region of the zoomed image of the same size as the original image. """ def __init__(self, probability, min_factor, max_factor): """ The amount of zoom applied is randomised, from between :attr:`min_factor` and :attr:`max_factor`. Set these both to the same value to always zoom by a constant factor. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param min_factor: The minimum amount of zoom to apply. Set both the :attr:`min_factor` and :attr:`min_factor` to the same values to zoom by a constant factor. :param max_factor: The maximum amount of zoom to apply. Set both the :attr:`min_factor` and :attr:`min_factor` to the same values to zoom by a constant factor. :type probability: Float :type min_factor: Float :type max_factor: Float """ Operation.__init__(self, probability) self.min_factor = min_factor self.max_factor = max_factor
[docs] def perform_operation(self, images): """ Zooms/scales the passed images and returns the new images. :param images: An arbitrarily long list of image(s) to be zoomed. :type images: List containing PIL.Image object(s). :return: The zoomed in image(s) as a list of PIL.Image object(s). """ factor = round(random.uniform(self.min_factor, self.max_factor), 2) def do(image): w, h = image.size # TODO: Join these two functions together so that we don't have this image_zoom variable lying around. image_zoomed = image.resize((int(round(image.size[0] * factor)), int(round(image.size[1] * factor))), resample=Image.BICUBIC) w_zoomed, h_zoomed = image_zoomed.size return image_zoomed.crop((floor((float(w_zoomed) / 2) - (float(w) / 2)), floor((float(h_zoomed) / 2) - (float(h) / 2)), floor((float(w_zoomed) / 2) + (float(w) / 2)), floor((float(h_zoomed) / 2) + (float(h) / 2)))) augmented_images = [] for image in images: augmented_images.append(do(image)) return augmented_images
[docs]class Mixup(Operation): """ Implements the *mixup* augmentation method, as described in: Zhang et al. (2018), *mixup*: Beyond Empirical Risk Minimization, arXiv:1710.09412 See `http://arxiv.org/abs/1710.09412 <http://arxiv.org/abs/1710.09412>`_ for details. Also see `https://github.com/facebookresearch/mixup-cifar10 <https://github.com/facebookresearch/mixup-cifar10>`_ for code which was followed to create this functionality in Augmentor. The *mixup* augmentation technique trains a neural network on "*combinations of pairs of examples and their labels*" (Zhang et al., 2018). In summary, *mixup* constructs training samples as follows: .. math:: \\tilde{x} = \\lambda x_i + (1-\\lambda)x_j .. math:: \\tilde{y} = \\lambda y_i + (1-\\lambda)y_j where :math:`(x_i, y_i)` and :math:`(x_j, y_j)` are two samples from the training data, :math:`x_i, x_j` are raw input vectors, :math:`y_i, y_j` are one-hot label encodings (such as [0.0, 1.0]), and :math:`\\lambda \\in [0, 1]` where :math:`\\lambda` is sampled randomly from the Beta distribution, :math:`\\beta(\\alpha, \\alpha)`. The :math:`\\alpha` hyper-parameter controls the strength of the interpolation between image-label pairs, where :math:`\\alpha \in \\{0, \\infty \\}` According to the paper referenced above, values for :math:`\\alpha` between 0.1 and 0.4 led to best performance. Smaller values for :math:`\\alpha` result in less *mixup* effect where larger values would tend to result in overfitting. """ def __init__(self, probability, alpha=0.4): """ Performs the *mixit* augmentation technique. .. note:: Not yet enabled! This function is currently implemented but not **enabled**, as it requires each image's label in order to operate - something which Augmentor was not designed to handle. :param probability: Controls the probability that the operation is performed when it is invoked in the pipeline. :param alpha: The alpha parameter controls the strength of the interpolation between image-label pairs. It's value can be any value greater than 0. A smaller value for :attr:`alpha` results in more values closer to 0 or 1, meaning the *mixup* is more often closer to either of the images in the pair. Its value is set to 0.4 by default. :type probability: Float :type alpha: Float """ Operation.__init__(self, probability) self.alpha = alpha
[docs] def perform_operation(self, images): """ This function is currently implemented but not **enabled**, as it requires each image's label in order to operate - something which Augmentor was not designed to handle. This is therefore future work, and may only be possible when used in combination with generators. """ if self.alpha > 0: lambda_value = np.random.beta(self.alpha, self.alpha) else: lambda_value = 1 def do(image1, image2, y1, y2): image1 = np.asarray(image1) image1 = image1.astype('float32') image2 = np.asarray(image2) image2 = image2.astype('float32') mixup_x = lambda_value * image1 + (1 - lambda_value) * image2 mixup_y = lambda_value * y1 + (1 - lambda_value) * y2 return mixup_x, mixup_y augmented_images = [] y1 = np.array([0.0, 1.0]) y2 = np.array([1.0, 0.0]) for image in images: augmented_images.append(do(image, image, y1, y2)) return augmented_images