Source code for fortrace.utility.image_processing.image_similarity

import os

import cv2
from numpy import ndarray
from skimage.metrics import normalized_root_mse, structural_similarity

from fortrace.utility.image_processing.opencv_utils import (
    _resize_image,
    read_image_gray_scale,
)
from fortrace.utility.logger_helper import setup_logger

logger = setup_logger(__name__)



[docs]
def image_similarity(
    vm_image: bytes | os.PathLike | cv2.Mat, target: bytes | os.PathLike | cv2.Mat
) -> float:
    """Compare two images to each other using SSIM

    Args:
        vm_image: image captured with a domain
        target: image captured with a domain or a path to a reference image

    Returns:
        structural similarity score between the two pictures
    """
    vm_image_gray = read_image_gray_scale(vm_image)

    target_gray = read_image_gray_scale(target)

    vm_image_gray, target_gray = _resize_image(vm_image_gray, target_gray)

    try:
        score, _ = structural_similarity(vm_image_gray, target_gray, full=True)
    except ValueError:
        return 0.0
    return score




[docs]
def nrmse(
    vm_image: bytes | os.PathLike | ndarray, target: bytes | os.PathLike | ndarray
) -> float:
    """Compute 'Normalized Root Mean Squared Error' between the two images.

    Args:
        vm_image: image captured with a domain
        target: image captured with a domain or a path to a reference image

    Returns:
        normalized error between [0,1], where a lower score means more similar images
    """
    vm_image_gray = read_image_gray_scale(vm_image)

    target_gray = read_image_gray_scale(target)

    vm_image_gray, target_gray = _resize_image(vm_image_gray, target_gray)

    return normalized_root_mse(vm_image_gray, target_gray)




[docs]
def image_difference(
    vm_image: bytes | os.PathLike | cv2.Mat,
    target: bytes | os.PathLike | cv2.Mat,
    min_area: int = 40,
) -> tuple[list[tuple[int, int, int, int]], list[float]]:
    """Compute the difference between two images, using structural similarity.

    Args:
        vm_image: image before change took place
        target: image after change took place
        min_area: minimal area in pixels to be considered as difference

    Returns:
        list of all differences between the two images with bounding boxes and their
        area. A list entry looks as follows:
            ((x, y, x + w, y + h), <area of bounding box>)
    """
    vm_image = read_image_gray_scale(vm_image)
    target = read_image_gray_scale(target)

    (threshold, target) = cv2.threshold(
        target, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU
    )
    vm_image = cv2.threshold(vm_image, threshold, 255, cv2.THRESH_BINARY)[1]

    _, diff = structural_similarity(vm_image, target, full=True)

    diff = (diff * 255).astype("uint8")

    thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
    contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]

    bounding_boxes = []
    bounding_boxes_area = []
    for c in contours:
        area = cv2.contourArea(c)
        # filter too small areas
        if area >= min_area:
            x, y, w, h = cv2.boundingRect(c)
            bounding_boxes.append((x, y, x + w, y + h))
            # bounding_boxes.append(OpenCVRectangle(x,y,w,h))
            bounding_boxes_area.append(area)

    return bounding_boxes, bounding_boxes_area




[docs]
def detect_newly_opened_window(
    vm_image: bytes | os.PathLike,
    target: bytes | os.PathLike,
    coordinates: tuple[int, int, int, int] | None = None,
) -> tuple[int, int, int, int] | None:
    """Obtain the coordinates of a newly opened window via image difference.

    A newly opened window is considered as the biggest rectangle identified in the
    delta of two images.

    Args:
        vm_image: screenshot before the window has opened
        target: screenshot after the window has opened (has to be the same size as
            `vm_image`)
        coordinates: coordinates in `vm_image` of 'parent' window that will be scanned
            for a new window (x, y, x + w, y + h)

    Returns:
        image coordinates of the newly opened window (x, y, x + w, y + h) or can return
            None, if the two images are the same
    """

    if coordinates is not None:
        # TODO: usability suggestion: check both image sizes, if one matches coordinates and crop only the one that doesn't
        vm_image = read_image_gray_scale(vm_image)
        target = read_image_gray_scale(target)
        vm_image = vm_image[
            coordinates[1] : coordinates[3], coordinates[0] : coordinates[2]
        ]
        target = target[
            coordinates[1] : coordinates[3], coordinates[0] : coordinates[2]
        ]

    bounding_boxes, bounding_boxes_area = image_difference(vm_image, target)

    if len(bounding_boxes) == 0:
        logger.debug(
            "Could not detect any differences between the two provided images."
        )
        return None

    new_window_coordinates = max(
        list(zip(bounding_boxes, bounding_boxes_area)), key=lambda x: x[1]
    )[0]

    # add offset of coordinates to returned value
    if coordinates is not None:
        x_offset = coordinates[0]
        y_offset = coordinates[1]
        new_window_coordinates = (
            new_window_coordinates[0] + x_offset,
            new_window_coordinates[1] + y_offset,
            new_window_coordinates[2] + x_offset,
            new_window_coordinates[3] + y_offset,
        )

    # FIXME: for window decorations, e.g., shadows will be included in difference

    return new_window_coordinates