-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGetClickables.py
159 lines (134 loc) · 4.69 KB
/
GetClickables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from urllib import urlopen
from bs4 import BeautifulSoup
from logger import LoggerHandler
import re
logger = LoggerHandler(__name__)
class ClickableEntity:
'''
Initializing each Clickable Entity with TagName , Attribute ,
AttributeValue
def __init__(self, tag, attr, attrVal, tagNumber=0, xpath):
self.tag = tag
self.attr = attr
self.attrVal = attrVal
self.tagNumber = tagNumber
self.xpath = ""
'''
def __init__(self, tag, attrs, xpath):
self.tag = tag
self.attrs = attrs
self.xpath = xpath
self.action = "click"
def __str__(self):
return str(self.xpath)
def __repr(self):
return str(self.xpath)
#return self.tag+" "+self.attrs+" "+self.xpath
def getLinks(domString):
'''
Returns all the anchor tags <a></a>
Filtering the <a> tags with href = "#something"
for eg <a href="#div1"> </a>
because this leads to same page with no new state discovery
'''
clickables = []
tagName = "a"
validLinks = {'href': [], 'onclick': []}
soup = BeautifulSoup(domString, 'html.parser')
links = soup.find_all("a")
buttons = soup.find_all("button")
for link in links:
link = str(link.get('href'))
if len(link) == 0:
pass
else:
clickableEntity = ClickableEntity(tagName, "href", link)
clickables.append(clickableEntity)
#validLinks['href'].append(link)
for link in links:
if link.get("onclick"):
clickableEntity = ClickableEntity(tagName, "onclick", link)
clickables.append(clickableEntity)
#validLinks['onclick'].append(link.get("onclick"))
return clickables
#return validLinks
def GetClickables(domString):
clickables = []
soup = BeautifulSoup(domString)
tagname = ""
anchor = soup.findAll("a")
submit = soup.findAll("input",attrs={'type':'submit'})
button = soup.findAll("input",attrs={'type':'button'})
otherclickables = otherClickables(domString)
totalclickables = anchor + submit + button + otherclickables
for clickable in totalclickables:
attrs = clickable.attrs
#print attrs
path = "//"
path+=str(clickable.name)
for key in attrs:
#print attr
path += "[@"+str(key)+"="+'"'+str(attrs[key])+'"'+"]"
#print path
#print attrs
clickableEntity = ClickableEntity(clickable.name, clickable.attrs, path)
clickables.append(clickableEntity)
return clickables
def otherClickables(domString):
'''
Returns other clickables with attribute 'onclick' and 'onmouseover'
'''
otherClickables = []
soup = BeautifulSoup(domString)
onclickElements = soup.findAll(attrs={'onclick': re.compile(r".")})
#onmouseoverElements = soup.findAll(attrs={'onmouseover': re.compile(r".")})
elements = onclickElements
for element in elements:
if element.name != "a" or element.name != "input" or element.name != "button":
print element
otherClickables.append(element)
del elements
return otherClickables
'''
for element in onclickElements:
clickableEntity = ClickableEntity(element.name, "onclick", element['onclick'])
otherClickables.append(clickableEntity)
for element in onmouseoverElements:
clickableEntity = ClickableEntity(element.name, "onmouseover", element['onmouseover'])
otherClickables.append(clickableEntity)
'''
def frameExists(domString):
'''
Checks whether 'frame' element exist in source code
'''
soup = BeautifulSoup(domString)
frame = soup.findAll("frame")
if len(frame) == 0:
return 0
else:
return 1
def getSubmitButtons(domString):
'''
Returns the submit buttons of forms with structure
<input type="submit">
<input type="button">
'''
buttons = []
tagName = "input"
soup = BeautifulSoup(domString, 'html.parser')
typeSubmit = soup.findAll("input",attrs={'type':'submit'})
typeButton = soup.findAll("input",attrs={'type':'button'})
for count, element in enumerate(typeSubmit):
clickableEntity = ClickableEntity(tagName, "type", "submit", count)
buttons.append(clickableEntity)
for count, element in enumerate(typeButton):
clickableEntity = ClickableEntity(tagName, "type", "button", count)
buttons.append(clickableEntity)
return buttons
def GetDomElements(url):
#urlHandle = urlopen(url);
#dom = urlHandle.read()
dom = open("page1").read()
getLinks(dom)
# GetDomElements("https://selenium-python.readthedocs.org/navigating.html")
# GetDomString("http://www.w3schools.com/tags/tag_button.asp")