Toufool · Avasam · Jun 16, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024
diff --git a/README.md b/README.md
@@ -25,6 +25,7 @@ This program can be used to automatically start, split, and reset your preferred
 - Download the [latest version](/../../releases/latest)
 - You can also check out the [latest dev builds](/../../actions/workflows/lint-and-build.yml?query=event%3Apush+is%3Asuccess) (requires a GitHub account)  
   (If you don't have a GitHub account, you can try [nightly.link](https://nightly.link/Toufool/AutoSplit/workflows/lint-and-build/dev))
+- Tesseract-OCR (optional; required for text recognition as an alternative comparison method). See [Tesseract install](#tesseract-install) below for installation instructions.
 
 - Linux users must ensure they are in the `tty` and `input` groups and have write access to `/dev/uinput`. You can run the following commands to do so:
 
@@ -51,7 +52,6 @@ This program can be used to automatically start, split, and reset your preferred
   - Wayland is not currently supported
   - WSL2/WSLg requires an additional Desktop Environment, external X11 server, and/or systemd
 - Python 3.10+ (Not required for normal use. Refer to the [build instructions](/docs/build%20instructions.md) if you'd like run the application directly in Python).
-- Tesseract-OCR (optional; requierd for text recognition as an alternative comparison method). See https://github.com/UB-Mannheim/tesseract/wiki for installation instructions.
 
 ## OPTIONS
 
@@ -230,17 +230,16 @@ The Start Image is similar to the Reset Image. You can only have one Start Image
 ### Text Recognition (OCR)
 
 You can use text recognition as an alternative comparison method.
-First you need to install tesseract and include it in your PATH variable. See [Compatibility](#Compatibility) above.
 
-To include tesseract in your PATH variable you can use this powershell snippet.
+#### Tesseract install
 
-Note: change the `$tesseract_path` variable to the location where tesseract is installed.
+First you need to install tesseract and include it in your system or user environment variables.
+- See <https://tesseract-ocr.github.io/tessdoc/Installation.html> for installation instruction on all platforms.
+- For Windows:
+  1. You can go directly to <https://github.com/UB-Mannheim/tesseract/wiki> to find the installer.
+  2. If you change the "Destination Folder" during install, then you'll also need to add it to your `PATH` environment variable.
 
-```
-$path = [System.Environment]::GetEnvironmentVariable("Path", "User")
-$tesseract_path = "C:\Program Files\Tesseract-OCR"
-[System.Environment]::SetEnvironmentVariable("Path", "$path;$tesseract_path", "User")
-```
+#### Usage
 
 To use this feature you need to place a text file (.txt) in your splits folder instead of an image file.
 
@@ -250,22 +249,31 @@ Filename: `001_start_auto_splitter.txt`
 
 Content:
 
-```
+```toml
 texts = ["complete any 2 encounters"]
 top_left = 275
 top_right = 540
 bottom_left = 70
 bottom_right = 95
+method = 0
 fps_limit = 1
 ```
 
 The `texts` field is an array and can take more than one text to look for:
 
-```
+```toml
 texts = ["look for me", "or this text"]
 ```
 
-The `top`, `bottom`, `left` and `right` options define a rectangle where the text you are looking for is expected to appear in the image.
+Note: for now we only use lowercase letters in the comparison. All uppercase letters are converted to lowercase before the comparison.
+
+The `top_left` and `top_right` (both X-axis) and `bottom_left` and `bottom_right` (both Y-axis) options define a rectangle where the text you are looking for is expected to appear in the image.
+
+Currently there are three comparison methods:
+
+* `0` - uses the Levenshtein distance (the default)
+* `1` - checks if the OCR text contains the searched text
+* `2` - looks for a perfect 1:1 match
 
 Note: This method can cause high CPU usage at the standard comparison FPS. You should therefor limit the comparison FPS when you use this method to 1 or 2 FPS using the `fps_limit` option.
 The size of the selected rectangle can also impact the CPU load (bigger = more CPU load).

diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py
@@ -10,7 +10,7 @@
 
 import error_messages
 from compare import check_if_image_has_transparency, extract_and_compare_text, get_comparison_method_by_index
-from utils import BGR_CHANNEL_COUNT, MAXBYTE, ColorChannel, ImageShape, is_valid_image
+from utils import BGR_CHANNEL_COUNT, MAXBYTE, TESSERACT_PATH, ColorChannel, ImageShape, is_valid_image
 
 if TYPE_CHECKING:
     from AutoSplit import AutoSplit
@@ -41,19 +41,20 @@ class AutoSplitImage:
     image_type: ImageType
     byte_array: MatLike | None = None
     mask: MatLike | None = None
-    texts: list[str]
+    texts: list[str] = []
     # This value is internal, check for mask instead
     _has_transparency = False
     # These values should be overriden by some Defaults if None. Use getters instead
     __delay_time: float | None = None
     __comparison_method: int | None = None
     __pause_time: float | None = None
     __similarity_threshold: float | None = None
-    __x: int
-    __xx: int
-    __y: int
-    __yy: int
-    __fps_limit: int
+    __x: int = 0
+    __xx: int = 0
+    __y: int = 0
+    __yy: int = 0
+    __ocr_comparison_method: int = 0
+    __fps_limit: int = 0
 
     @property
     def is_ocr(self):
@@ -106,16 +107,10 @@ def __init__(self, path: str):
         self.filename = os.path.split(path)[-1].lower()
         self.flags = flags_from_filename(self.filename)
         self.loops = loop_from_filename(self.filename)
-        self.texts = list[str]()
         self.__delay_time = delay_time_from_filename(self.filename)
         self.__comparison_method = comparison_method_from_filename(self.filename)
         self.__pause_time = pause_from_filename(self.filename)
         self.__similarity_threshold = threshold_from_filename(self.filename)
-        self.__x = 0
-        self.__xx = 0
-        self.__y = 0
-        self.__yy = 0
-        self.__fps_limit = 0
         if path.endswith("txt"):
             self.__parse_text_file(path)
         else:
@@ -129,16 +124,26 @@ def __init__(self, path: str):
             self.image_type = ImageType.SPLIT
 
     def __parse_text_file(self, path: str):
+        if not TESSERACT_PATH:
+            error_messages.tesseract_missing(path)
+            return
+
         with open(path, encoding="utf-8") as f:
             data = toml.load(f)
-            self.texts = data["texts"]
-            self.__x = data["top_left"]
-            self.__xx = data["top_right"]
-            self.__y = data["bottom_left"]
-            self.__yy = data["bottom_right"]
+            self.texts = [text.lower().strip() for text in data["texts"]]
+            self.__x = abs(data["top_left"])
+            self.__xx = abs(data["top_right"])
+            self.__y = abs(data["bottom_left"])
+            self.__yy = abs(data["bottom_right"])
+            if "method" in data:
+                self.__ocr_comparison_method = abs(data["method"])
             self.__fps_limit = 1
             if "fps_limit" in data:
-                self.fps_limit = data["fps_limit"]
+                self.__fps_limit = abs(data["fps_limit"])
+
+        if self.__xx <= self.__x or self.__yy <= self.__y:
+            error_messages.wrong_ocr_coordinates(path)
+            return
 
     def __read_image_bytes(self, path: str):
         image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
@@ -190,7 +195,9 @@ def compare_with_capture(
             return 0.0
 
         if self.is_ocr:
-            return extract_and_compare_text(capture[self.__y:self.__yy, self.__x:self.__xx], self.texts)
+            return extract_and_compare_text(
+                capture[self.__y:self.__yy, self.__x:self.__xx], self.texts, self.__ocr_comparison_method,
+            )
 
         if not is_valid_image(self.byte_array):
             return 0.0

diff --git a/src/compare.py b/src/compare.py
@@ -13,6 +13,7 @@
 HISTOGRAM_SIZE = [8, 8, 8]
 RANGES = [0, MAXRANGE, 0, MAXRANGE, 0, MAXRANGE]
 MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE
+MAX_VALUE = 1.0
 
 
 def compare_histograms(source: MatLike, capture: MatLike, mask: MatLike | None = None):
@@ -127,41 +128,65 @@ def compare_phash(source: MatLike, capture: MatLike, mask: MatLike | None = None
     return 1 - (hash_diff / 64.0)
 
 
-def extract_and_compare_text(capture: MatLike, texts: list[str]):
+def extract_and_compare_text(capture: MatLike, texts: list[str], method_index: int):
     """
     Compares the extracted text of the given image and returns the similarity between the two texts.
     The best match of all texts is returned.
 
     @param capture: Image of any given shape as a numpy array
     @param texts: a list of strings to match for
+    @param method_index: the comparison method index to use
     @return: The similarity between the text in the image and the text supplied as a number 0 to 1.
     """
+    method = get_ocr_comparison_method_by_index(method_index)
     png = np.array(cv2.imencode(".png", capture)[1]).tobytes()
-    # If the string is found 1:1 in the string extracted from the image a 1 is returned.
-    # Otherwise the levenshtein ratio is calculated between the two strings and gets returned.
     # Especially with stylised characters, OCR could conceivably get the right
     # letter, but mix up the casing (m/M, o/O, t/T, etc.)
     image_string = run_tesseract(png).lower().strip()
 
     ratio = 0.0
     for text in texts:
-        # TODO: this 1:1 matching could lead to false positives
-        # maybe remove it and only rely on fuzzy matching?
-        # discussion: https://github.com/Toufool/AutoSplit/pull/272#discussion_r1477120477
-        if text in image_string:
-            ratio = 1.0
+        ratio = max(ratio, method(text, image_string))
+        if ratio == MAX_VALUE:
             break
-        ratio = max(ratio, Levenshtein.ratio(text, image_string))
     # TODO: debug: remove me
     if ratio > 0.9:  # noqa: PLR2004
         print(f"text from image ({ratio:,.2f}): {image_string}")
     return ratio
 
 
+def compare_levenshtein(a: str, b: str):
+    return Levenshtein.ratio(a, b)  # pyright: ignore [reportUnknownMemberType]
+
+
+def compare_submatch(a: str, b: str):
+    if a in b:
+        return MAX_VALUE
+    return 0.0
+
+
+def compare_one_to_one(a: str, b: str):
+    if a == b:
+        return MAX_VALUE
+    return 0.0
+
+
 def __compare_dummy(*_: object):
     return 0.0
 
 
+def get_ocr_comparison_method_by_index(comparison_method_index: int):
+    match comparison_method_index:
+        case 0:
+            return compare_levenshtein
+        case 1:
+            return compare_submatch
+        case 2:
+            return compare_one_to_one
+        case _:
+            return __compare_dummy
+
+
 def get_comparison_method_by_index(comparison_method_index: int):
     match comparison_method_index:
         case 0:

diff --git a/src/error_messages.py b/src/error_messages.py
@@ -228,3 +228,19 @@ def handle_top_level_exceptions(exception: Exception) -> NoReturn:
     else:
         traceback.print_exception(type(exception), exception, exception.__traceback__)
     sys.exit(1)
+
+
+def tesseract_missing(ocr_split_file_path: str):
+    set_text_message(
+        f"{ocr_split_file_path!r} is an Optical Character Recognition split file but tesseract couldn't be found."
+        + f'\nPlease read <a href="https://github.com/{GITHUB_REPOSITORY}#install-tesseract">'
+        + f"github.com/{GITHUB_REPOSITORY}#install-tesseract</a> for installation instructions.",
+    )
+
+
+def wrong_ocr_coordinates(ocr_split_file_path: str):
+    set_text_message(
+        f"{ocr_split_file_path!r} has invalid coordinates."
+        + "\nPlease make sure that the 'top_right' and 'bottom_right' coordinates are not euqal to or lower then the "
+        + "'top_left' and 'bottom_left' coordinates.",
+    )