Skip to content

Commit

Permalink
Merge pull request #14 from Eating-Wisely-Labs/GPT4VisionAPI_Update_PR1
Browse files Browse the repository at this point in the history
Gpt4 vision api update pr1
  • Loading branch information
kyegomez authored Jan 14, 2025
2 parents 76b76af + f06ef08 commit 338e7c4
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 31 deletions.
10 changes: 5 additions & 5 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ root:
- any-glob-to-any-file: '*'

# Add 'Documentation' label to any changes within 'docs' folder or any subfolders
Documentation:
DocumentationChanged:
- changed-files:
- any-glob-to-any-file: docs/**

Expand All @@ -27,14 +27,14 @@ ghactions:
Scripts:
- changed-files:
- any-glob-to-any-file: scripts/*

## Equivalent of the above mentioned configuration using another syntax
Documentation:
DocsAndGuides:
- changed-files:
- any-glob-to-any-file: ['docs/*', 'guides/*']

# Add 'Documentation' label to any change to .md files within the entire repository
Documentation:
# Add 'DocsMarkdown' label to any change to .md files within the entire repository
DocsMarkdown:
- changed-files:
- any-glob-to-any-file: '**/*.md'

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/label.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ jobs:
- uses: actions/[email protected]
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/labeler.yml
100 changes: 74 additions & 26 deletions swarm_models/gpt4_vision_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class GPT4VisionAPI(BaseMultiModalModel):
def __init__(
self,
openai_api_key: str = openai_api_key,
model_name: str = "gpt-4-vision-preview",
model_name: str = "gpt-4o-mini",
logging_enabled: bool = False,
max_workers: int = 10,
max_tokens: str = 300,
Expand All @@ -71,7 +71,7 @@ def __init__(
*args,
**kwargs,
):
super(GPT4VisionAPI).__init__(*args, **kwargs)
super(GPT4VisionAPI, self).__init__(*args, **kwargs)
self.openai_api_key = openai_api_key
self.logging_enabled = logging_enabled
self.model_name = model_name
Expand All @@ -83,6 +83,7 @@ def __init__(
self.meta_prompt = meta_prompt
self.system_prompt = system_prompt


if self.logging_enabled:
logging.basicConfig(level=logging.DEBUG)
else:
Expand All @@ -93,6 +94,7 @@ def __init__(
if self.meta_prompt:
self.system_prompt = self.meta_prompt_init()


def encode_image(self, img: str):
"""Encode image to base64."""
if not os.path.exists(img):
Expand All @@ -111,51 +113,97 @@ def download_img_then_encode(self, img: str):
response = requests.get(img)
return base64.b64encode(response.content).decode("utf-8")

def compose_messages(self, task: str, img: str, img_list: list = None, context: list = None):
"""Compose the payload for the GPT-4 Vision API, if illegal image paths are provided
, None is returned, means the payload is not valid
Parameters
----------
task : str
The task to run the model on.
img : str
The image to run the task on
img_list : list
A list of images to run the task on
context : list
A list of context to run the task on
Returns
-------
payload : dict
The payload for the gpt-4o Vision API
if None is returned, then the payload is not valid
"""


# Compose the messages
messages = []
# Add the system prompt to the messages
messages.append({"role": "system", "content": self.system_prompt})
# Add the context to the messages
messages = messages + context if context else messages

# Compose the content
content = []
# Add the task to the content
content.append({"type": "text", "text": task})
# Add the images to the content
images = [img] if img else []
images = images + img_list if img_list else images
if len(images) > 0:
for image in images:
if image:
if os.path.exists(image):
encoded_img = self.encode_image(image)
content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_img}"}})
elif image.startswith("http"):
content.append({"type": "image_url", "image_url": {"url": f"{image}"}})
else:
logger.error(f"Image file not found: {image} or not a valid URL")
print(f"Image file not found: {image} or not a valid URL")
return None
content = {
"role": "user",
"content": content
}
messages.append(content)
return messages
return None

# Function to handle vision tasks
def run(
self,
task: str = None,
img: str = None,
multi_imgs: list = None,
return_json: bool = False,
messages: list = None,
*args,
**kwargs,
):
"""Run the model."""
try:
base64_image = self.encode_image(img)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {self.openai_api_key}",
}
if messages is None:
messages = self.compose_messages(task, img, multi_imgs, messages)

if messages is None:
raise ValueError("Image path is invalid, please check the image path")

payload = {
"model": self.model_name,
"messages": [
{
"role": "system",
"content": [self.system_prompt],
},
{
"role": "user",
"content": [
{"type": "text", "text": task},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
},
],
"max_tokens": self.max_tokens,
**kwargs,
"messages": messages
}
response = requests.post(headers=headers, json=payload)

response = requests.post(self.openai_proxy, headers=headers, json=payload)

# Get the response as a JSON object
response_json = response.json()


# Return the JSON object if return_json is True
if return_json is True:
print(response_json)
Expand Down Expand Up @@ -198,7 +246,7 @@ def video_prompt(self, frames):
"""
PROMPT = f"""
These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video:
{frames}
"""
return PROMPT
Expand Down Expand Up @@ -374,4 +422,4 @@ def print_dashboard(self):
# example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be
# numbers or letters and typically correspond to specific segments or parts of the image.
# """
# return META_PROMPT
# return META_PROMPT

0 comments on commit 338e7c4

Please sign in to comment.