-
Notifications
You must be signed in to change notification settings - Fork 0
/
Xpath_tool.py
162 lines (135 loc) · 9.2 KB
/
Xpath_tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from selenium import webdriver
from bs4 import BeautifulSoup
import re
class Xpath_Tool:
"Class to generate the xpaths"
def __init__(self):
"Initialize the required variables"
self.elements = None
self.guessable_elements = ['input','button']
self.known_attribute_list = ['id','name','placeholder','value','title','type','class']
self.variable_names = []
self.button_text_lists = []
self.language_counter = 1
def generate_xpath(self,soup):
"generate the xpath and assign the variable names"
result_flag = False
for guessable_element in self.guessable_elements:
self.elements = soup.find_all(guessable_element)
for element in self.elements:
if (not element.has_attr("type")) or (element.has_attr("type") and element['type'] != "hidden"):
for attr in self.known_attribute_list:
if element.has_attr(attr):
locator = self.guess_xpath(guessable_element,attr,element)
if len(driver.find_elements_by_xpath(locator))==1:
result_flag = True
variable_name = self.get_variable_names(element)
# checking for the unique variable names
if variable_name != '' and variable_name not in self.variable_names:
self.variable_names.append(variable_name)
print ("%s_%s = %s"%(guessable_element, variable_name.encode('utf-8').decode('latin-1'), locator.encode('utf-8').decode('latin-1')))
break
else:
print (locator.encode('utf-8').decode('latin-1') + "----> Couldn't generate appropriate variable name for this xpath")
elif guessable_element == 'button' and element.getText():
button_text = element.getText()
if element.getText() == button_text.strip():
locator = xpath_obj.guess_xpath_button(guessable_element,"text()",element.getText())
else:
locator = xpath_obj.guess_xpath_using_contains(guessable_element,"text()",button_text.strip())
if len(driver.find_elements_by_xpath(locator))==1:
result_flag = True
#Check for utf-8 characters in the button_text
matches = re.search(r"[^\x00-\x7F]",button_text)
if button_text.lower() not in self.button_text_lists:
self.button_text_lists.append(button_text.lower())
if not matches:
# Striping and replacing characters before printing the variable name
print ("%s_%s = %s"%(guessable_element,button_text.strip().strip("!?.").encode('utf-8').decode('latin-1').lower().replace(" + ","_").replace(" & ","_").replace(" ","_"), locator.encode('utf-8').decode('latin-1')))
else:
# printing the variable name with utf-8 characters along with language counter
print ("%s_%s_%s = %s"%(guessable_element,"foreign_language",self.language_counter, locator.encode('utf-8').decode('latin-1')) + "---> Foreign language found, please change the variable name appropriately")
self.language_counter +=1
else:
# if the variable name is already taken
print (locator.encode('utf-8').decode('latin-1') + "----> Couldn't generate appropriate variable name for this xpath")
break
elif not guessable_element in self.guessable_elements:
print("We are not supporting this gussable element")
return result_flag
def get_variable_names(self,element):
"generate the variable names for the xpath"
# condition to check the length of the 'id' attribute and ignore if there are numerics in the 'id' attribute. Also ingnoring id values having "input" and "button" strings.
if (element.has_attr('id') and len(element['id'])>2) and bool(re.search(r'\d', element['id'])) == False and ("input" not in element['id'].lower() and "button" not in element['id'].lower()):
self.variable_name = element['id'].strip("_")
# condition to check if the 'value' attribute exists and not having date and time values in it.
elif element.has_attr('value') and element['value'] != '' and bool(re.search(r'([\d]{1,}([/-]|\s|[.])?)+(\D+)?([/-]|\s|[.])?[[\d]{1,}',element['value']))== False and bool(re.search(r'\d{1,2}[:]\d{1,2}\s+((am|AM|pm|PM)?)',element['value']))==False:
# condition to check if the 'type' attribute exists
# getting the text() value if the 'type' attribute value is in 'radio','submit','checkbox','search'
# if the text() is not '', getting the getText() value else getting the 'value' attribute
# for the rest of the type attributes printing the 'type'+'value' attribute values. Doing a check to see if 'value' and 'type' attributes values are matching.
if (element.has_attr('type')) and (element['type'] in ('radio','submit','checkbox','search')):
if element.getText() !='':
self.variable_name = element['type']+ "_" + element.getText().strip().strip("_.")
else:
self.variable_name = element['type']+ "_" + element['value'].strip("_.")
else:
if element['type'].lower() == element['value'].lower():
self.variable_name = element['value'].strip("_.")
else:
self.variable_name = element['type']+ "_" + element['value'].strip("_.")
# condition to check if the "name" attribute exists and if the length of "name" attribute is more than 2 printing variable name
elif element.has_attr('name') and len(element['name'])>2:
self.variable_name = element['name'].strip("_")
# condition to check if the "placeholder" attribute exists and is not having any numerics in it.
elif element.has_attr('placeholder') and bool(re.search(r'\d', element['placeholder'])) == False:
self.variable_name = element['placeholder']
# condition to check if the "type" attribute exists and not in text','radio','button','checkbox','search'
# and printing the variable name
elif (element.has_attr('type')) and (element['type'] not in ('text','button','radio','checkbox','search')):
self.variable_name = element['type']
# condition to check if the "title" attribute exists
elif element.has_attr('title'):
self.variable_name = element['title']
# condition to check if the "role" attribute exists
elif element.has_attr('role') and element['role']!="button":
self.variable_name = element['role']
else:
self.variable_name = ''
return self.variable_name.lower().replace("+/- ","").replace("| ","").replace(" / ","_"). \
replace("/","_").replace(" - ","_").replace(" ","_").replace("&","").replace("-","_"). \
replace("[","_").replace("]","").replace(",","").replace("__","_").replace(".com","").strip("_")
def guess_xpath(self,tag,attr,element):
"Guess the xpath based on the tag,attr,element[attr]"
#Class attribute returned as a unicodeded list, so removing 'u from the list and joining back
if type(element[attr]) is list:
element[attr] = [i.encode('utf-8').decode('latin-1') for i in element[attr]]
element[attr] = ' '.join(element[attr])
self.xpath = "//%s[@%s='%s']"%(tag,attr,element[attr])
return self.xpath
def guess_xpath_button(self,tag,attr,element):
"Guess the xpath for button tag"
self.button_xpath = "//%s[%s='%s']"%(tag,attr,element)
return self.button_xpath
def guess_xpath_using_contains(self,tag,attr,element):
"Guess the xpath using contains function"
self.button_contains_xpath = "//%s[contains(%s,'%s')]"%(tag,attr,element)
return self.button_contains_xpath
#-------START OF SCRIPT--------
if __name__ == "__main__":
print ("Start of %s"%__file__)
#Initialize the xpath object
xpath_obj = Xpath_Util()
#Get the URL and parse
url = input("Enter URL: ")
#Create a chrome session
driver = webdriver.Chrome()
driver.get(url)
#Parsing the HTML page with BeautifulSoup
page = driver.execute_script("return document.body.innerHTML").\
encode('utf-8').decode('latin-1')#returns the inner HTML as a string
soup = BeautifulSoup(page, 'html.parser')
#execute generate_xpath
if xpath_obj.generate_xpath(soup) is False:
print ("No XPaths generated for the URL:%s"%url)
driver.quit()