import os
import cv2
from numpy import ndarray
from skimage.metrics import normalized_root_mse, structural_similarity
from fortrace.utility.image_processing.opencv_utils import (
_resize_image,
read_image_gray_scale,
)
from fortrace.utility.logger_helper import setup_logger
logger = setup_logger(__name__)
[docs]
def image_similarity(
vm_image: bytes | os.PathLike | cv2.Mat, target: bytes | os.PathLike | cv2.Mat
) -> float:
"""Compare two images to each other using SSIM
Args:
vm_image: image captured with a domain
target: image captured with a domain or a path to a reference image
Returns:
structural similarity score between the two pictures
"""
vm_image_gray = read_image_gray_scale(vm_image)
target_gray = read_image_gray_scale(target)
vm_image_gray, target_gray = _resize_image(vm_image_gray, target_gray)
try:
score, _ = structural_similarity(vm_image_gray, target_gray, full=True)
except ValueError:
return 0.0
return score
[docs]
def nrmse(
vm_image: bytes | os.PathLike | ndarray, target: bytes | os.PathLike | ndarray
) -> float:
"""Compute 'Normalized Root Mean Squared Error' between the two images.
Args:
vm_image: image captured with a domain
target: image captured with a domain or a path to a reference image
Returns:
normalized error between [0,1], where a lower score means more similar images
"""
vm_image_gray = read_image_gray_scale(vm_image)
target_gray = read_image_gray_scale(target)
vm_image_gray, target_gray = _resize_image(vm_image_gray, target_gray)
return normalized_root_mse(vm_image_gray, target_gray)
[docs]
def image_difference(
vm_image: bytes | os.PathLike | cv2.Mat,
target: bytes | os.PathLike | cv2.Mat,
min_area: int = 40,
) -> tuple[list[tuple[int, int, int, int]], list[float]]:
"""Compute the difference between two images, using structural similarity.
Args:
vm_image: image before change took place
target: image after change took place
min_area: minimal area in pixels to be considered as difference
Returns:
list of all differences between the two images with bounding boxes and their
area. A list entry looks as follows:
((x, y, x + w, y + h), <area of bounding box>)
"""
vm_image = read_image_gray_scale(vm_image)
target = read_image_gray_scale(target)
(threshold, target) = cv2.threshold(
target, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU
)
vm_image = cv2.threshold(vm_image, threshold, 255, cv2.THRESH_BINARY)[1]
_, diff = structural_similarity(vm_image, target, full=True)
diff = (diff * 255).astype("uint8")
thresh = cv2.threshold(diff, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
bounding_boxes = []
bounding_boxes_area = []
for c in contours:
area = cv2.contourArea(c)
# filter too small areas
if area >= min_area:
x, y, w, h = cv2.boundingRect(c)
bounding_boxes.append((x, y, x + w, y + h))
# bounding_boxes.append(OpenCVRectangle(x,y,w,h))
bounding_boxes_area.append(area)
return bounding_boxes, bounding_boxes_area
[docs]
def detect_newly_opened_window(
vm_image: bytes | os.PathLike,
target: bytes | os.PathLike,
coordinates: tuple[int, int, int, int] | None = None,
) -> tuple[int, int, int, int] | None:
"""Obtain the coordinates of a newly opened window via image difference.
A newly opened window is considered as the biggest rectangle identified in the
delta of two images.
Args:
vm_image: screenshot before the window has opened
target: screenshot after the window has opened (has to be the same size as
`vm_image`)
coordinates: coordinates in `vm_image` of 'parent' window that will be scanned
for a new window (x, y, x + w, y + h)
Returns:
image coordinates of the newly opened window (x, y, x + w, y + h) or can return
None, if the two images are the same
"""
if coordinates is not None:
# TODO: usability suggestion: check both image sizes, if one matches coordinates and crop only the one that doesn't
vm_image = read_image_gray_scale(vm_image)
target = read_image_gray_scale(target)
vm_image = vm_image[
coordinates[1] : coordinates[3], coordinates[0] : coordinates[2]
]
target = target[
coordinates[1] : coordinates[3], coordinates[0] : coordinates[2]
]
bounding_boxes, bounding_boxes_area = image_difference(vm_image, target)
if len(bounding_boxes) == 0:
logger.debug(
"Could not detect any differences between the two provided images."
)
return None
new_window_coordinates = max(
list(zip(bounding_boxes, bounding_boxes_area)), key=lambda x: x[1]
)[0]
# add offset of coordinates to returned value
if coordinates is not None:
x_offset = coordinates[0]
y_offset = coordinates[1]
new_window_coordinates = (
new_window_coordinates[0] + x_offset,
new_window_coordinates[1] + y_offset,
new_window_coordinates[2] + x_offset,
new_window_coordinates[3] + y_offset,
)
# FIXME: for window decorations, e.g., shadows will be included in difference
return new_window_coordinates