import pathlib
import queue
import uuid
from enum import Enum, auto
from os import PathLike
from time import sleep
from typing import Any, Callable, Protocol
import cv2
import numpy as np
from fortrace.core.qemu_monitor import QEMUMonitorSession
from fortrace.utility.exceptions import DesktopEnvironmentException
from fortrace.utility.image_processing.image_similarity import (
detect_newly_opened_window,
)
from fortrace.utility.image_processing.text_detection import (
detect_and_recognize_text,
text_line_contains,
)
from fortrace.utility.logger_helper import setup_logger
logger = setup_logger(__name__)
[docs]
class ApplicationType(Enum):
"""Supported application types, used to select the correct factory method"""
FILE_MANAGER = auto()
WEB_BROWSER = auto()
TERMINAL = auto()
TEXT_EDITOR = auto()
MAIL_CLIENT = auto()
OTHER = (
auto()
) # use this value for special applications that are not natively supported
[docs]
class ApplicationEvent(Enum):
"""Application events that are processed by the desktop environment."""
CLOSED = auto()
NEW_APPLICATION_OPENED = auto()
FOCUS_SHIFTED = auto()
FOCUS_APPLICATION = auto() # called to actively focus an application
APPLICATION_RESIZED = auto()
APPLICATION_POPUP_OPENED = auto()
APPLICATION_POPUP_CLOSED = auto()
[docs]
class ParentNotifier(Protocol):
"""Used to provide type hints and enable the IDE for static type checking.
It refers to DesktopEnvironment._on_change method."""
def __call__(self, event: ApplicationEvent, **kwargs): ...
[docs]
class GenericApplication:
"""Representation of a generic graphic application window.
This class resembles the general interface of all graphical applications with which
ForTrace might interact. It can be used to control applications with keyboard
shortcuts and simple commands, without writing any application-specific Python
classes.
"""
coordinates: (
tuple[int, int, int, int] | None
) # coordinates of an application window in OpenCV's coordinates (x, y, x + w,
# y + h)
def __init__(
self,
name: str,
application_type: ApplicationType,
qemu_monitor_session: QEMUMonitorSession,
parent_notifier: ParentNotifier,
):
"""Construct a GenericApplication object.
This is the representation of a graphical application window.
Args:
name: name of the application
application_type: type is used to determine the correct factory method
qemu_monitor_session: handle to active QEMUMonitor session of the domain
parent_notifier: desktop environment attribute to handle window changes
"""
self._name = name
self._uuid = (
uuid.uuid1()
) # to distinguish between different instances of the same application
self._type = application_type
self._qs = qemu_monitor_session
self._parent_notifier = parent_notifier
self._focused = True
self.coordinates = None
self._popup_windows = queue.LifoQueue()
self._parent_notifier(
ApplicationEvent.NEW_APPLICATION_OPENED, application_reference=self
)
[docs]
def send_key_combination(self, key_combination: str, times: int = 1):
"""Send a direct key combination to the application.
Args:
key_combination: key combination to be sent (using qemu key_codes)
times: number of times to send a key combination
"""
self._qs.send_key_combination(key_combination, times)
[docs]
def send_text(self, text: str, end_ret: bool = False):
"""Send text to the application.
Args:
text: text to be sent
end_ret: should return be pressed after the text is entered?
Note:
The text is allowed to contain '\\n' and '\\r' characters, which are respected
"""
self._qs.send_text(text, end_ret)
[docs]
def close(self):
"""Close an application with 'alt-f4' key combination."""
if not self._focused:
self._parent_notifier(
ApplicationEvent.FOCUS_APPLICATION, application_reference=self
)
sleep(2) # TODO: replace with nrmse and wait for change?
self._qs.send_key_combination("alt-f4")
self._parent_notifier(ApplicationEvent.CLOSED, application_reference=self)
[docs]
def take_screenshot(self) -> np.ndarray:
"""Take a screenshot of the application window.
Returns:
OpenCV image of the application window with all color channels
"""
screenshot = self._qs.take_screenshot()
screenshot = np.frombuffer(screenshot, np.uint8)
screenshot = cv2.imdecode(screenshot, cv2.IMREAD_COLOR)
if self.coordinates is None:
logger.warning(
"The size of application %s was not determined in the "
"open_application method of the OS. Thus, this method returns the whole"
" screen",
self.name,
)
else:
screenshot = screenshot[
self.coordinates[0] : self.coordinates[2],
self.coordinates[1] : self.coordinates[3],
]
return screenshot
[docs]
def focus_element(self, text: str):
"""Focus the element with the specified text.
This method acts as a stub for the integration of a screen parsing tool into
ForTrace++. It ought to be overwritten on instance level, like it is done in
the OmniParser example.
Args:
text: text to be searched for
"""
raise NotImplementedError
@property
def name(self) -> str:
"""Name of this application (not unique if multiple instances are opened)."""
return self._name
@property
def uuid(self) -> uuid.UUID:
"""UUID of the application (unique, even for multiple instances)."""
return self._uuid
@property
def focused(self):
"""Signals whether this application receives mouse and keyboard inputs."""
return self._focused
@focused.setter
def focused(self, value: bool):
self._focused = value
@property
def size(self):
"""Compute the size of the application window.
Returns:
The size of the application window in pixels. 0, if coordinates are unknown.
"""
if self.coordinates is None:
return 0
return (self.coordinates[2] - self.coordinates[0]) * (
self.coordinates[3] - self.coordinates[1]
)
[docs]
class FileDialogue(GenericPopup):
def __init__(self, parent_application: GenericApplication):
super().__init__(parent_application)
[docs]
def save_to_directory(
self, destination: pathlib.Path | None = None, name: str | None = None
):
if name:
self.send_text(name)
if destination:
self.send_key_combination("ctrl-l")
self.send_text(str(destination))
self.send_key_combination("ret")
self.send_key_combination("ret") # select 'Save'
# check whether there is an overwrite dialogue
if text_line_contains(
self.extract_text()[1], ["already exists", "replace it", "replace"]
):
self.send_key_combination("left") # select 'Yes'
self.send_key_combination("ret") # overwrite the file
self.close() # this closes the file dialogue window