Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Adding screenshot compression function, will help reduce the RT of LLM interface. #86

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ ANDROID_XML_DIR: "/sdcard" # Set the directory on your Android device to store
DOC_REFINE: false # Set this to true will make the agent refine existing documentation based on the latest demonstration; otherwise, the agent will not regenerate a new documentation for elements with the same resource ID.
MAX_ROUNDS: 20 # Set the round limit for the agent to complete the task
DARK_MODE: false # Set this to true if your app is in dark mode to enhance the element labeling
MIN_DIST: 30 # The minimum distance between elements to prevent overlapping during the labeling process
MIN_DIST: 30 # The minimum distance between elements to prevent overlapping during the labeling process
USE_SNAPSHOT_COMPRESS: true # compress the snapshot image size, will help the interface respond quickly.
SNAPSHOT_COMPRESS_MEGABYTE_SIZE: 0.5 # The expected size (Megabyte) of screenshot compression will be uploaded to LLM, and compressing the image will help the interface respond quickly.
13 changes: 11 additions & 2 deletions scripts/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
import requests
import dashscope

from utils import print_with_color, encode_image
from config import load_config
configs = load_config()
DEFAULT_SNAPSHOT_MEGABYTES: int = configs["SNAPSHOT_COMPRESS_MEGABYTE_SIZE"]
USE_SNAPSHOT_COMPRESS: bool = configs["USE_SNAPSHOT_COMPRESS"]


from utils import print_with_color, encode_image, compress_image_size


class BaseModel:
Expand Down Expand Up @@ -35,7 +41,10 @@ def get_model_response(self, prompt: str, images: List[str]) -> (bool, str):
}
]
for img in images:
base64_img = encode_image(img)
if USE_SNAPSHOT_COMPRESS:
base64_img = encode_image(compress_image_size(img, DEFAULT_SNAPSHOT_MEGABYTES))
else:
base64_img = encode_image(img)
content.append({
"type": "image_url",
"image_url": {
Expand Down
50 changes: 50 additions & 0 deletions scripts/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import base64
import cv2
import pyshine as ps
Expand Down Expand Up @@ -98,3 +99,52 @@ def get_unit_len(n):
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')


def get_image_megabyte_size(image_path: str) -> int:
'''
Get image size (Megabyte).
'''
return os.stat(image_path).st_size / 1000 / 1000


def compress_image_size(image_path: str, expect_megabyte: int) -> str:
'''
Compress image size.
Compress image size to reduce prompt volume, and decrease AI(openai, qwen, etc...) interface RT.

Args:
image_path (str): image original abs path.
expect_megabyte (int): expect compress size in mega byte.

Returns:
str: compressed image path.

Example:

```
ls -al '/Users/.../github/appAgentFork/AppAgent/apps/X/demos/self_explore_2024-07-19_11-49-26' total 8440
drwxr-xr-x@ 6 youngfreefjs staff 192 7 19 11:49 .
drwxr-xr-x@ 4 youngfreefjs staff 128 7 19 11:50 ..
-rw-r--r--@ 1 youngfreefjs staff 92927 7 19 11:49 1.xml
-rw-r--r--@ 1 youngfreefjs staff 1703275 7 19 11:49 1_before.png
-rw-r--r--@ 1 youngfreefjs staff 1995296 7 19 11:49 1_before_labeled.png
-rw-r--r--@ 1 youngfreefjs staff 459612 7 19 11:50 1_before_labeled_compression.jpg
```
'''

quality: int = 95

image_reader = cv2.imread(image_path)

compressed_image_path: str = os.path.splitext(image_path)[0]+'_compression.jpg'

while quality > 10:
cv2.imwrite(compressed_image_path, image_reader, [cv2.IMWRITE_JPEG_QUALITY, quality])
current_megabyte_size: int = get_image_megabyte_size(compressed_image_path)
print_with_color(f'compress image size to: {get_image_megabyte_size(compressed_image_path)} MB.')
if get_image_megabyte_size(compressed_image_path) <= expect_megabyte:
break
quality -= 10 if current_megabyte_size >= 6.5 else 5
open(compressed_image_path, 'rb')
return compressed_image_path