TencentQQGYLab · youngfreeFJS · Jul 19, 2024 · Jul 19, 2024
diff --git a/config.yaml b/config.yaml
@@ -16,4 +16,6 @@ ANDROID_XML_DIR: "/sdcard"  # Set the directory on your Android device to store
 DOC_REFINE: false  # Set this to true will make the agent refine existing documentation based on the latest demonstration; otherwise, the agent will not regenerate a new documentation for elements with the same resource ID.
 MAX_ROUNDS: 20  # Set the round limit for the agent to complete the task
 DARK_MODE: false  # Set this to true if your app is in dark mode to enhance the element labeling
-MIN_DIST: 30  # The minimum distance between elements to prevent overlapping during the labeling process
+MIN_DIST: 30  # The minimum distance between elements to prevent overlapping during the labeling process
+USE_SNAPSHOT_COMPRESS: true # compress the snapshot image size, will help the interface respond quickly.
+SNAPSHOT_COMPRESS_MEGABYTE_SIZE: 0.5  # The expected size (Megabyte) of screenshot compression will be uploaded to LLM, and compressing the image will help the interface respond quickly.
diff --git a/scripts/model.py b/scripts/model.py
@@ -6,7 +6,13 @@
 import requests
 import dashscope
 
-from utils import print_with_color, encode_image
+from config import load_config
+configs = load_config()
+DEFAULT_SNAPSHOT_MEGABYTES: int = configs["SNAPSHOT_COMPRESS_MEGABYTE_SIZE"]
+USE_SNAPSHOT_COMPRESS: bool = configs["USE_SNAPSHOT_COMPRESS"]
+
+
+from utils import print_with_color, encode_image, compress_image_size
 
 
 class BaseModel:
@@ -35,7 +41,10 @@ def get_model_response(self, prompt: str, images: List[str]) -> (bool, str):
             }
         ]
         for img in images:
-            base64_img = encode_image(img)
+            if USE_SNAPSHOT_COMPRESS:
+                base64_img = encode_image(compress_image_size(img, DEFAULT_SNAPSHOT_MEGABYTES))
+            else:
+                base64_img = encode_image(img)
             content.append({
                 "type": "image_url",
                 "image_url": {

diff --git a/scripts/utils.py b/scripts/utils.py
@@ -1,3 +1,4 @@
+import os
 import base64
 import cv2
 import pyshine as ps
@@ -98,3 +99,52 @@ def get_unit_len(n):
 def encode_image(image_path):
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
+
+
+def get_image_megabyte_size(image_path: str) -> int:
+    '''
+    Get image size (Megabyte).
+    '''
+    return  os.stat(image_path).st_size / 1000 / 1000
+
+
+def compress_image_size(image_path: str, expect_megabyte: int) -> str:
+    '''
+    Compress image size.
+    Compress image size to reduce prompt volume, and decrease AI(openai, qwen, etc...) interface RT.
+
+    Args:
+        image_path (str): image original abs path.
+        expect_megabyte (int): expect compress size in mega byte.
+
+    Returns:
+        str: compressed image path.
+
+    Example:
+
+        ```
+        ls -al '/Users/.../github/appAgentFork/AppAgent/apps/X/demos/self_explore_2024-07-19_11-49-26'  total 8440
+        drwxr-xr-x@ 6 youngfreefjs  staff      192  7 19 11:49 .
+        drwxr-xr-x@ 4 youngfreefjs  staff      128  7 19 11:50 ..
+        -rw-r--r--@ 1 youngfreefjs  staff    92927  7 19 11:49 1.xml
+        -rw-r--r--@ 1 youngfreefjs  staff  1703275  7 19 11:49 1_before.png
+        -rw-r--r--@ 1 youngfreefjs  staff  1995296  7 19 11:49 1_before_labeled.png
+        -rw-r--r--@ 1 youngfreefjs  staff   459612  7 19 11:50 1_before_labeled_compression.jpg
+        ```
+    '''
+
+    quality: int = 95
+
+    image_reader = cv2.imread(image_path)
+
+    compressed_image_path: str = os.path.splitext(image_path)[0]+'_compression.jpg'
+
+    while quality > 10:
+        cv2.imwrite(compressed_image_path, image_reader, [cv2.IMWRITE_JPEG_QUALITY, quality])
+        current_megabyte_size: int = get_image_megabyte_size(compressed_image_path)
+        print_with_color(f'compress image size to: {get_image_megabyte_size(compressed_image_path)} MB.')
+        if get_image_megabyte_size(compressed_image_path) <= expect_megabyte:
+            break
+        quality -= 10 if current_megabyte_size >= 6.5 else 5
+    open(compressed_image_path, 'rb')
+    return compressed_image_path