Source code for fortrace.utility.image_processing.image_similarity

import os

import cv2
from numpy import ndarray
from skimage.metrics import normalized_root_mse, structural_similarity

from fortrace.utility.image_processing.opencv_utils import (
    _resize_image,
    read_image_gray_scale,
)
from fortrace.utility.logger_helper import setup_logger

logger = setup_logger(__name__)


[docs] def image_similarity( vm_image: bytes | os.PathLike | cv2.Mat, target: bytes | os.PathLike | cv2.Mat ) -> float: """Compare two images to each other using SSIM Args: vm_image: image captured with a domain target: image captured with a domain or a path to a reference image Returns: structural similarity score between the two pictures """ vm_image_gray = read_image_gray_scale(vm_image) target_gray = read_image_gray_scale(target) vm_image_gray, target_gray = _resize_image(vm_image_gray, target_gray) try: score, _ = structural_similarity(vm_image_gray, target_gray, full=True) except ValueError: return 0.0 return score
[docs] def nrmse( vm_image: bytes | os.PathLike | ndarray, target: bytes | os.PathLike | ndarray ) -> float: """Compute 'Normalized Root Mean Squared Error' between the two images. Args: vm_image: image captured with a domain target: image captured with a domain or a path to a reference image Returns: normalized error between [0,1], where a lower score means more similar images """ vm_image_gray = read_image_gray_scale(vm_image) target_gray = read_image_gray_scale(target) vm_image_gray, target_gray = _resize_image(vm_image_gray, target_gray) return normalized_root_mse(vm_image_gray, target_gray)
[docs] def image_difference( vm_image: bytes | os.PathLike | cv2.Mat, target: bytes | os.PathLike | cv2.Mat, min_area: int = 40, ) -> tuple[list[tuple[int, int, int, int]], list[float]]: """Compute the difference between two images, using structural similarity. Args: vm_image: image before change took place target: image after change took place min_area: minimal area in pixels to be considered as difference Returns: list of all differences between the two images with bounding boxes and their area. A list entry looks as follows: ((x, y, x + w, y + h), <area of bounding box>) """ vm_image = read_image_gray_scale(vm_image) target = read_image_gray_scale(target) (threshold, target) = cv2.threshold( target, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU ) vm_image = cv2.threshold(vm_image, threshold, 255, cv2.THRESH_BINARY)[1] _, diff = structural_similarity(vm_image, target, full=True) diff = (diff * 255).astype("uint8") thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1] contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours = contours[0] if len(contours) == 2 else contours[1] bounding_boxes = [] bounding_boxes_area = [] for c in contours: area = cv2.contourArea(c) # filter too small areas if area >= min_area: x, y, w, h = cv2.boundingRect(c) bounding_boxes.append((x, y, x + w, y + h)) # bounding_boxes.append(OpenCVRectangle(x,y,w,h)) bounding_boxes_area.append(area) return bounding_boxes, bounding_boxes_area
[docs] def detect_newly_opened_window( vm_image: bytes | os.PathLike, target: bytes | os.PathLike, coordinates: tuple[int, int, int, int] | None = None, ) -> tuple[int, int, int, int] | None: """Obtain the coordinates of a newly opened window via image difference. A newly opened window is considered as the biggest rectangle identified in the delta of two images. Args: vm_image: screenshot before the window has opened target: screenshot after the window has opened (has to be the same size as `vm_image`) coordinates: coordinates in `vm_image` of 'parent' window that will be scanned for a new window (x, y, x + w, y + h) Returns: image coordinates of the newly opened window (x, y, x + w, y + h) or can return None, if the two images are the same """ if coordinates is not None: # TODO: usability suggestion: check both image sizes, if one matches coordinates and crop only the one that doesn't vm_image = read_image_gray_scale(vm_image) target = read_image_gray_scale(target) vm_image = vm_image[ coordinates[1] : coordinates[3], coordinates[0] : coordinates[2] ] target = target[ coordinates[1] : coordinates[3], coordinates[0] : coordinates[2] ] bounding_boxes, bounding_boxes_area = image_difference(vm_image, target) if len(bounding_boxes) == 0: logger.debug( "Could not detect any differences between the two provided images." ) return None new_window_coordinates = max( list(zip(bounding_boxes, bounding_boxes_area)), key=lambda x: x[1] )[0] # add offset of coordinates to returned value if coordinates is not None: x_offset = coordinates[0] y_offset = coordinates[1] new_window_coordinates = ( new_window_coordinates[0] + x_offset, new_window_coordinates[1] + y_offset, new_window_coordinates[2] + x_offset, new_window_coordinates[3] + y_offset, ) # FIXME: for window decorations, e.g., shadows will be included in difference return new_window_coordinates