Source code for fortrace.utility.applications.application

import pathlib
import queue
import uuid
from enum import Enum, auto
from os import PathLike
from time import sleep
from typing import Any, Callable, Protocol

import cv2
import numpy as np

from fortrace.core.qemu_monitor import QEMUMonitorSession
from fortrace.utility.exceptions import DesktopEnvironmentException
from fortrace.utility.image_processing.image_similarity import (
    detect_newly_opened_window,
)
from fortrace.utility.image_processing.text_detection import (
    detect_and_recognize_text,
    text_line_contains,
)
from fortrace.utility.logger_helper import setup_logger

logger = setup_logger(__name__)


[docs] class ApplicationType(Enum): """Supported application types, used to select the correct factory method""" FILE_MANAGER = auto() WEB_BROWSER = auto() TERMINAL = auto() TEXT_EDITOR = auto() MAIL_CLIENT = auto() OTHER = ( auto() ) # use this value for special applications that are not natively supported
[docs] class ApplicationEvent(Enum): """Application events that are processed by the desktop environment.""" CLOSED = auto() NEW_APPLICATION_OPENED = auto() FOCUS_SHIFTED = auto() FOCUS_APPLICATION = auto() # called in order to actively focus an application APPLICATION_RESIZED = auto() APPLICATION_POPUP_OPENED = auto() APPLICATION_POPUP_CLOSED = auto()
[docs] class PopupType(Enum): GENERIC = auto() FILE_DIALOGUE = auto() WARNING = auto()
[docs] class ParentNotifier(Protocol): """Used to provide type hints and enable the IDE for static type checking. It refers to DesktopEnvironment._on_change method.""" def __call__(self, event: ApplicationEvent, **kwargs): ...
[docs] class GenericApplication: """Representation of a generic graphic application window. This class resembles the general interface of all graphical applications with which ForTrace might interact. It can be used to control applications with keyboard shortcuts and simple commands, without writing any application specific Python classes. """ coordinates: ( tuple[int, int, int, int] | None ) # coordinates of application window in OpenCV's coordinates (x, y, x + w, y + h) def __init__( self, name: str, application_type: ApplicationType, qemu_monitor_session: QEMUMonitorSession, parent_notifier: ParentNotifier, ): """Construct a GenericApplication object. This is the representation of a graphical application window. Args: name: name of the application application_type: type is used to determine the correct factory method qemu_monitor_session: handle to active QEMUMonitor session of the domain parent_notifier: desktop environment attribute to handle window changes """ self._name = name self._uuid = ( uuid.uuid1() ) # to distinguish between different instances of the same application self._type = application_type self._qs = qemu_monitor_session self._parent_notifier = parent_notifier self._focused = True self.coordinates = None self._popup_windows = queue.LifoQueue() self._parent_notifier( ApplicationEvent.NEW_APPLICATION_OPENED, application_reference=self ) # TODO: register opening of application here or in parent_notifier (here we could set coordiantes, instead of relying on the OS to do so)
[docs] def send_key_combination(self, key_combination: str, times: int = 1): """Send a direct key combination to the application. Args: key_combination: key combination to be sent (using qemu key_codes) times: number of times to send key combination """ self._qs.send_key_combination(key_combination, times)
[docs] def send_text(self, text: str, end_ret: bool = False): """Send text to the application. Args: text: text to be sent end_ret: should return be pressed after the text is entered? Note: The text is allowed to contain '\\n' and '\\r' characters, which are respected """ self._qs.send_text(text, end_ret)
[docs] def perform_complex_action( self, func: Callable[["GenericApplication"], Any] ) -> Any: """Performs a complex action in an application. Args: func: Callable representing the action to perform Returns: returns whatever `func` returns """ return func(self)
[docs] def close(self): """Close an application with 'alt-f4' key combination.""" if not self._focused: self._parent_notifier( ApplicationEvent.FOCUS_APPLICATION, application_reference=self ) sleep(2) # TODO: replace with nrmse and wait for change? self._qs.send_key_combination("alt-f4") self._parent_notifier(ApplicationEvent.CLOSED, application_reference=self)
[docs] def extract_text(self) -> tuple[list[tuple[int, int, int, int]], list[str]]: """Extract text from application window. Extracts the text visible in the application window. Uses OCR, thus the result is not 100% accurate, thus applying string similarity search is highly recommended. Returns: tuple of list with bounding boxes and the detected strings within """ if self.coordinates is None: logger.warning( "The size of application %s was not determined in the " "open_application method of the OS. Thus, text from the whole screen " "will be extracted.", self._name, ) return detect_and_recognize_text(self._qs.take_screenshot(), self.coordinates)
[docs] def open_popup( self, screenshot_before_popup: bytes | PathLike | np.ndarray, popup_type: PopupType = PopupType.GENERIC, popup_min_size: int = 50000, ) -> "GenericPopup": """React on the opening of a new popup window within the application window. Args: screenshot_before_popup: a screenshot taken of the application before the popup is opened popup_type: specify the kind of popup to return popup_min_size: the size of the popup in px Returns: Instance of opened popup window to interact with """ match popup_type: case PopupType.GENERIC: popup = GenericPopup(self) case PopupType.FILE_DIALOGUE: popup = FileDialogue(self) case _: raise ValueError("PopupType is not supported") self._popup_windows.put(popup) # TODO: window in background might include dynamic content -> need to look for another solution for _ in range(5): popup.coordinates = detect_newly_opened_window( screenshot_before_popup, self._qs.take_screenshot() ) if popup.coordinates and popup.size > popup_min_size: logger.debug( "Popup detected at coordinates %s with size %s", popup.coordinates, popup.size, ) break logger.debug("Popup not detected. Trying again after waiting period.") sleep(5) else: logger.error("Cannot determine location of popup window.") raise DesktopEnvironmentException( "Failed to determine location of popup window for application %s", self._name, ) self._parent_notifier( ApplicationEvent.APPLICATION_POPUP_OPENED, application_reference=self, popup_reference=popup, ) return popup
[docs] def take_screenshot(self) -> np.ndarray: """Take screenshot of the application window. Returns: OpenCV image of the application window with all color channels """ screenshot = self._qs.take_screenshot() screenshot = np.frombuffer(screenshot, np.uint8) screenshot = cv2.imdecode(screenshot, cv2.IMREAD_COLOR) if self.coordinates is None: logger.warning( "The size of application %s was not determined in the " "open_application method of the OS. Thus, this method returns the whole" " screen", self.name, ) else: screenshot = screenshot[ self.coordinates[0] : self.coordinates[2], self.coordinates[1] : self.coordinates[3], ] return screenshot
[docs] def focus_element(self, text: str): """Focus the element with the specified text. This method acts as a stub for the integration of a screen parsing tool into ForTrace++. It ought to be overwritten on instance level, like it is done in the OmniParser example. Args: text: text to be searched for """ raise NotImplementedError
@property def name(self) -> str: """Name of this application (not unique if multiple instances are opened).""" return self._name @property def uuid(self) -> uuid.UUID: """UUID of the application (unique, even for multiple instances).""" return self._uuid @property def focused(self): """Signals whether this application receives mouse and keyboard inputs.""" return self._focused @focused.setter def focused(self, value: bool): self._focused = value @property def size(self): """Compute the size of the application window. Returns: The size of the application window in pixels. 0, if coordinates are unknown. """ if self.coordinates is None: return 0 return (self.coordinates[2] - self.coordinates[0]) * ( self.coordinates[3] - self.coordinates[1] )
[docs] class GenericPopup(GenericApplication): """Representation of a popup window opened by another application. Should not be used on its own, but created through GenericApplication's open_popup method. """ def __init__( self, parent_application: GenericApplication, ): super().__init__( parent_application.name + "_popup", parent_application._type, parent_application._qs, parent_application._parent_notifier, ) self._parent_application = parent_application
[docs] def close(self): """Notifies the desktop env about the closing of a popup window. Note: Popups are closed indirectly by pressing a certain key and NOT alt-f4 """ self._parent_notifier( ApplicationEvent.APPLICATION_POPUP_CLOSED, application_reference=self._parent_application, popup_reference=self, )
[docs] class FileDialogue(GenericPopup): def __init__(self, parent_application: GenericApplication): super().__init__(parent_application)
[docs] def save_to_directory( self, destination: pathlib.Path | None = None, name: str | None = None ): if name: self.send_text(name) if destination: self.send_key_combination("ctrl-l") self.send_text(str(destination)) self.send_key_combination("ret") self.send_key_combination("ret") # select 'Save' # check whether there is an overwrite dialogue if text_line_contains( self.extract_text()[1], ["already exists", "replace it", "replace"] ): self.send_key_combination("left") # select 'Yes' self.send_key_combination("ret") # overwrite the file self.close() # this closes the file dialogue window