forked from All-Hands-AI/OpenHands
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbrowse.py
104 lines (94 loc) · 4.31 KB
/
browse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from dataclasses import dataclass, field
from browsergym.utils.obs import flatten_axtree_to_str
from openhands.core.schema import ActionType, ObservationType
from openhands.events.observation.observation import Observation
@dataclass
class BrowserOutputObservation(Observation):
"""This data class represents the output of a browser."""
url: str
trigger_by_action: str
screenshot: str = field(repr=False) # don't show in repr
error: bool = False
observation: str = ObservationType.BROWSE
# do not include in the memory
open_pages_urls: list = field(default_factory=list)
active_page_index: int = -1
dom_object: dict = field(default_factory=dict, repr=False) # don't show in repr
axtree_object: dict = field(default_factory=dict, repr=False) # don't show in repr
extra_element_properties: dict = field(
default_factory=dict, repr=False
) # don't show in repr
last_browser_action: str = ''
last_browser_action_error: str = ''
focused_element_bid: str = ''
@property
def message(self) -> str:
return 'Visited ' + self.url
def __str__(self) -> str:
ret = (
'**BrowserOutputObservation**\n'
f'URL: {self.url}\n'
f'Error: {self.error}\n'
f'Open pages: {self.open_pages_urls}\n'
f'Active page index: {self.active_page_index}\n'
f'Last browser action: {self.last_browser_action}\n'
f'Last browser action error: {self.last_browser_action_error}\n'
f'Focused element bid: {self.focused_element_bid}\n'
)
ret += '--- Agent Observation ---\n'
ret += self.get_agent_obs_text()
return ret
def get_agent_obs_text(self) -> str:
"""Get a concise text that will be shown to the agent."""
if self.trigger_by_action == ActionType.BROWSE_INTERACTIVE:
text = f'[Current URL: {self.url}]\n'
text += f'[Focused element bid: {self.focused_element_bid}]\n\n'
if self.error:
text += (
'================ BEGIN error message ===============\n'
'The following error occurred when executing the last action:\n'
f'{self.last_browser_action_error}\n'
'================ END error message ===============\n'
)
else:
text += '[Action executed successfully.]\n'
try:
# We do not filter visible only here because we want to show the full content
# of the web page to the agent for simplicity.
# FIXME: handle the case when the web page is too large
cur_axtree_txt = self.get_axtree_str(filter_visible_only=False)
text += (
f'============== BEGIN accessibility tree ==============\n'
f'{cur_axtree_txt}\n'
f'============== END accessibility tree ==============\n'
)
except Exception as e:
text += (
f'\n[Error encountered when processing the accessibility tree: {e}]'
)
return text
elif self.trigger_by_action == ActionType.BROWSE:
text = f'[Current URL: {self.url}]\n'
if self.error:
text += (
'================ BEGIN error message ===============\n'
'The following error occurred when trying to visit the URL:\n'
f'{self.last_browser_action_error}\n'
'================ END error message ===============\n'
)
text += '============== BEGIN webpage content ==============\n'
text += self.content
text += '\n============== END webpage content ==============\n'
return text
else:
raise ValueError(f'Invalid trigger_by_action: {self.trigger_by_action}')
def get_axtree_str(self, filter_visible_only: bool = False) -> str:
cur_axtree_txt = flatten_axtree_to_str(
self.axtree_object,
extra_properties=self.extra_element_properties,
with_clickable=True,
skip_generic=False,
filter_visible_only=filter_visible_only,
)
self._axtree_str = cur_axtree_txt
return cur_axtree_txt