forked from In3tinct/See-SURF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
see-surf.py
424 lines (368 loc) · 17.1 KB
/
see-surf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
#!/usr/bin/env python3
import queue
from threading import Thread
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import sys
import re
from argparse import ArgumentParser
import requests
import base64, xml.etree.ElementTree
import urllib
import json
banner="""
### #### #### ### # # ### #### # #
# # # # # # # # # # #
# # # # # # # # # # #
## ### ### ##### ## # # ### ### # #
# # # # # # # # #
# # # # # # # # # # #
### #### #### ### ### # # # # #
"""
print(banner)
parser = ArgumentParser()
parser.add_argument("-H", "--host", dest="host", metavar="HOST", required=True)
parser.add_argument("-t", "--threads", dest="threads", metavar="THREADS")
parser.add_argument("-c","--cookies", nargs='+', dest="cookies", metavar="COOKIES")
parser.add_argument("-v","--verbose", dest="verbose", action='store_true')
parser.add_argument("-p","--payload", dest="payload")
parser.add_argument("-b", "--burp",dest="burp",help="provide a burp file", action="store")
args = parser.parse_args()
validateHost_regex="^(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$"
validateHostIpWithPort_regex="^https?:\/\/(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])?:?[0-9]+$"
#Validating Host name
if not(re.match(validateHost_regex,args.host) or re.match(validateHostIpWithPort_regex,args.host)):
print ("Terminating... Please enter Host in the format http://google.com or https://google.com or http://10.10.10.10 for internal hosts")
sys.exit()
if args.payload and not re.match(validateHost_regex,args.payload) and not re.match(validateHostIpWithPort_regex,args.payload):
print ("Terminating... Please enter Host in the format http://google.com or http://192.168.1.1:80")
sys.exit()
#Keeps a record of links which are already saved and are present just once in the queue
linksVisited=set()
ssrfVul=set()
#Throw away list just used for ignoring unnecessary crawling and generating noisy output
throwAwayListForRest=set()
throwAwayGetReqs={}
#Ignore the path which we couldn't be crawled
ignoreList=["pdf","mailto","javascript"]
#List containing keywords to look for in post param name attributes and in get parameters
matchList="(url|web|site|uri)"
#Cookies to send along with each requests
cookiesDict={}
if args.cookies:
for cook in args.cookies:
cookiesDict[cook[:cook.find("=")]]=cook[cook.find("=")+1:]
#Making an external request to a hostname through the potential vulnerable parameter to validate SSRF
def makingExternalRequests(paramName, url):
regexToReplace=paramName+"=(.*?)(?:&|$)"
parameterValuetoReplace=re.search(regexToReplace,url).group(1)
#Adding paramname 'args.payload+"/"+paramName,' at the end of burp collaborator url to differentiate which param succeeded to make external request.
formingPayloadURL=re.sub(parameterValuetoReplace,args.payload+"/"+paramName,url)
print ("\033[91m[+] Making external request with the potential vulnerable url:"+formingPayloadURL)
requests.get(formingPayloadURL)
#This checks against URL keywords in param NAME
def matchURLKeywordsInName(getOrForm,paramName,url):
if args.verbose:
temp=url+":paramname:"+paramName
else:
temp=paramName
if temp not in ssrfVul and re.search(matchList,paramName,re.I):
print ("\033[92m[-] Potential vulnerable '{}' parameter {} '{}' at '{}'".format(getOrForm,"Name",paramName,url))
ssrfVul.add(temp)
#Trying to make an external request to validate potential SSRF (Only for GET parameter for now)
if args.payload and getOrForm == "GET":
makingExternalRequests(paramName,url)
#This checks URL pattern in param VALUE and also if an IP is passed somewhere in the values
def matchURLPatternInValue(getOrForm, paramName,paramValues,url):
#Since all fields didn't have paramNames hence this condition
if args.verbose:
temp=url+":paramname:"+paramValues if paramName=="" else url+":paramname:"+paramName
else:
temp=paramValues if paramName=="" else paramName
if temp not in ssrfVul and (re.match("^(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$",paramValues) or re.match("((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.|$)){4}",paramValues)):
print ("\033[92m[-] Potential vulnerable '{}' parameter {} '{}' at '{}'".format(getOrForm, "Value" if paramName=="" else "Name",paramValues if paramName=="" else paramName,url))
ssrfVul.add(temp)
if args.payload and getOrForm == "GET":
makingExternalRequests(paramName,url)
def checkForGetRequest(url):
#print ("Checking for ssrf:"+url)
#Regex to find parameters in a url
checking_params_for_url= re.findall("(\?|\&)([^=]+)\=([^&]+)",url)
#Checking if there is a paramater in the URL (This would filter rest APIs in the format /test/1 /test/2)
if not len(checking_params_for_url)==0:
#Getting the param values params[2] and param name params[1] and matching against regex
for params in checking_params_for_url:
matchURLKeywordsInName("GET",params[1],url)
matchURLPatternInValue("GET",params[1],params[2],url)
def checkFormParameters(siteContent,url):
for inputFields in BeautifulSoup(siteContent,'html.parser').find_all('input'):
if inputFields.has_attr('name'):
matchURLKeywordsInName("FORM",inputFields["name"],url)
#Found some cases where input fields didn't have any Value attribute
if inputFields.has_attr('value'):
matchURLPatternInValue("FORM",inputFields["name"] if inputFields.has_attr('name') else "",inputFields["value"],url)
#Sometimes input will have placeholders which gives url patterns
if inputFields.has_attr('placeholder'):
matchURLPatternInValue("FORM",inputFields["name"] if inputFields.has_attr('name') else "",inputFields["placeholder"],url)
#This checks against URL keywords in param NAME
def burp_matchURLKeywordsInName(getOrForm,paramName,url):
if re.search(matchList,paramName,re.I):
print ("\033[92m[-] Potential vulnerable '{}' parameter {} '{}' at '{}'".format(getOrForm,"Name",paramName,url))
#Trying to make an external request to validate potential SSRF (Only for GET parameter for now)
if args.payload and getOrForm == "GET":
makingExternalRequests(paramName,url)
#This checks URL pattern in param VALUE and also if an IP is passed somewhere in the values
def burp_matchURLPatternInValue(getOrForm, paramName,paramValues,url):
#Regex is changed since Form parameters sometimes have array or other object in their values
if (re.match("(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/)?[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?",str(paramValues)) or re.match("((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)",str(paramValues))):
print ("\033[92m[-] Potential vulnerable '{}' parameter {} '{}' at '{}'".format(getOrForm, "Value" if paramName=="" else "Name",paramValues if paramName=="" else paramName,url))
if args.payload and getOrForm == "GET":
makingExternalRequests(paramName,url)
post_throwAwayListForRest=set()
post_throwAwayGetReqs={}
get_throwAwayListForRest=set()
get_throwAwayGetReqs={}
q_burp = queue.Queue()
q = queue.Queue()
def burp_siteMap_parse(q_burp):
while True:
try:
item=q_burp.get()
post=False
if item.find('status').text=="200" and item.find('method').text=="POST":
post=True
else:
post=False
linkUrl=item.find('url').text
#Reducing unneccessary crawling and duplication
#Some post request were containing parameters in the URL as well for exmaple POST /api?returnUrl=
if "?" not in linkUrl:
rest_apis=linkUrl.rsplit('/',1)
if not rest_apis[1]=='' and rest_apis[1].isdigit():
if post:
if rest_apis[0] in post_throwAwayListForRest:
q_burp.task_done()
continue
#Throw away lists for ignoring restapi links, don't want to mess with the original results in linksVisited
else:
post_throwAwayListForRest.add(rest_apis[0])
else:
if rest_apis[0] in throwAwayListForRest:
q_burp.task_done()
continue
#Throw away lists for ignoring restapi links, don't want to mess with the original results in linksVisited
else:
get_throwAwayListForRest.add(rest_apis[0])
else:
#Reducing duplication for GET requests having same parameters for example, here there would only be one entry saved
#since the 2nd url contains all param of 1st url plus one more parameter 'filter'
#http://www.msn.com/es-mx/deportes/browse/el-universal/vs-BBnqaEA?page=2&sort=sort_1
#http://www.msn.com/es-mx/deportes/browse/el-universal/vs-BBnqaEA?page=2&filter=duration_0&sort=sort_2
checking_params_for_url= re.findall("(\?|\&)([^=]+)\=([^&]+)",linkUrl)
get_req=linkUrl.rsplit('?',1)
url=get_req[0]
parameters=get_req[1]
if post and url not in post_throwAwayGetReqs:
post_throwAwayGetReqs[url]=parameters
elif not post and url not in get_throwAwayGetReqs:
get_throwAwayGetReqs[url]=parameters
else:
if post:
existingParams=post_throwAwayGetReqs[url]
else:
existingParams=get_throwAwayGetReqs[url]
allParameterExists = False
for params in checking_params_for_url:
#Some param names have special chars we need to escape them and then search
formingRegex=re.escape(params[1])
if re.search(formingRegex,existingParams,re.I):
allParameterExists=True
else:
allParameterExists=False
if allParameterExists:
q_burp.task_done()
continue
else:
if post:
post_throwAwayGetReqs[url]=parameters
else:
get_throwAwayGetReqs[url]=parameters
#Actual Processing of requests starts, just checking for 200 status
if item.find('status').text=="200" and item.find('method').text=="POST":
#Special condition for handling URL parameters in post request to send them
if "?" in linkUrl:
checking_params_for_url= re.findall("(\?|\&)([^=]+)\=([^&]+)",linkUrl)
#Checking if there is a paramater in the URL (This would filter rest APIs in the format /test/1 /test/2)
if not len(checking_params_for_url)==0:
#Getting the param values params[2] and param name params[1] and matching against regex
for params in checking_params_for_url:
matchURLKeywordsInName("POST",params[1],linkUrl)
matchURLPatternInValue("POST",params[1],params[2],linkUrl)
response=base64.b64decode(item.find('request').text).decode("utf8")
content_type_regex='\\r\\nContent-Type:(.*?)\\r\\n'
if re.search(content_type_regex,response):
content_type = (re.search(content_type_regex,response).group(1))
if "application/x-www-form-urlencoded" in content_type:
form_regex='\\r\\n\\r\\n(.*)'
response=urllib.parse.unquote(response)
if re.search(form_regex,response):
form_req=re.search(form_regex,response).group(1)
checking_params_for_url= re.findall("(\&)?([^=]+)\=([^&]+)",form_req)
#Checking if there is a paramater in the URL (This would filter rest APIs in the format /test/1 /test/2)
if not len(checking_params_for_url)==0:
#Getting the param values params[2] and param name params[1] and matching against regex
for params in checking_params_for_url:
#print (params[1])
burp_matchURLKeywordsInName("POST",params[1],linkUrl)
burp_matchURLPatternInValue("POST",params[1],params[2],linkUrl)
elif "json" in content_type:
#print (urllib.parse.unquote(response))
json_regex='\\r\\n\\r\\n({(.|\n)*})'
if re.search(json_regex,response):
json_req=urllib.parse.unquote(re.search(json_regex,response).group(1))
#print (json_req)
json_req=json_req.replace('\n', '').replace('\r', '')
for key,value in json.loads(json_req).items():
burp_matchURLKeywordsInName("POST",key,linkUrl)
burp_matchURLPatternInValue("POST",key,value,linkUrl)
#TODO
elif "xml" in content_type:
print ("")
elif item.find('status').text=="200" and item.find('method').text=="GET":
checking_params_for_url= re.findall("(\?|\&)([^=]+)\=([^&]+)",linkUrl)
#Checking if there is a paramater in the URL (This would filter rest APIs in the format /test/1 /test/2)
if not len(checking_params_for_url)==0:
#Getting the param values params[2] and param name params[1] and matching against regex
for params in checking_params_for_url:
burp_matchURLKeywordsInName("GET",params[1],linkUrl)
burp_matchURLPatternInValue("GET",params[1],params[2],linkUrl)
#Adding the link found to do basic crawling to get maximum results
q.put(linkUrl)
q_burp.task_done()
except Exception as e:
print(e)
q_burp.task_done()
continue
def basicCrawling(url):
if args.cookies:
r = requests.get(url, cookies=cookiesDict)
else:
r = requests.get(url)
siteContent=r.text
if url not in linksVisited:
checkFormParameters(siteContent,url)
checkForGetRequest(url)
linksVisited.add(url)
for links in BeautifulSoup(siteContent,'html.parser').find_all('a'):
#Only proceed if links have href tag, many of the a tags
#were having images and src in it
#Ignoring if its an anchor tag having images inside
if len(links.find_all("img"))>0:
#print ("Images")
continue
#Checking for common file extensions that exists in anchor tags and ignoring
ignoreListMatch=False
for ignore in ignoreList:
if ignore in str(links):
ignoreListMatch=True
break
if ignoreListMatch:
continue
if links.has_attr('href'):
linkUrl=links['href']
#Checking for links which points to the same domain or contains hash to avoid unnecessary crawling
if "#" in linkUrl or linkUrl=="/":
continue
#For conditions where <a href='index.php?id=21'>
if not linkUrl.startswith('http') and "www" not in linkUrl:
if linkUrl.startswith('/'):
linkUrl=baseURL+linkUrl
else:
linkUrl=baseURL+"/"+linkUrl
#skipping the loop if not of same domain
if not linkUrl.startswith(baseURL):
continue
#Order of IF check conditions are important so we don't miss valid data, hence placing this condition at last
#Handling REST URLs duplication test/1 test/2 or test/, otherwise vulnerable form params were getting duplicated
#Also handling issues where the parameter value changes but the request remains same for example test?abc=1 and test?abc=2
#We do not need to crawl those again
if "?" not in linkUrl:
rest_apis=linkUrl.rsplit('/',1)
if not rest_apis[1]=='' and rest_apis[1].isdigit():
if rest_apis[0] in throwAwayListForRest:
continue
#Throw away lists for ignoring restapi links, don't want to mess with the original results in linksVisited
else:
throwAwayListForRest.add(rest_apis[0])
else:
#Reducing duplication for GET requests having same parameters for example, here there would only be one entry saved
#since the 2nd url contains all param of 1st url plus one more parameter 'filter'
#http://www.msn.com/es-mx/deportes/browse/el-universal/vs-BBnqaEA?page=2&sort=sort_1
#http://www.msn.com/es-mx/deportes/browse/el-universal/vs-BBnqaEA?page=2&filter=duration_0&sort=sort_2
checking_params_for_url= re.findall("(\?|\&)([^=]+)\=([^&]+)",linkUrl)
get_req=linkUrl.rsplit('?',1)
url=get_req[0]
parameters=get_req[1]
if url not in throwAwayGetReqs:
throwAwayGetReqs[url]=parameters
else:
existingParams=throwAwayGetReqs[url]
allParameterExists = False
for params in checking_params_for_url:
#Some param names have special chars we need to escape them and then search
formingRegex=re.escape(params[1])
if re.search(formingRegex,existingParams,re.I):
allParameterExists=True
else:
allParameterExists=False
if allParameterExists:
continue
else:
throwAwayGetReqs[url]=parameters
#Only letting visit the links which have not been visited before
if linkUrl not in linksVisited:
q.put(linkUrl)
#linksVisited.add(linkUrl)
#checkForGetRequest(linkUrl)
#checkFormParameters(siteContent,linkUrl)
def do_stuff(q):
while True:
url = q.get()
try:
basicCrawling(url)
q.task_done()
except Exception as e:
print(e)
q.task_done()
continue
parsed=urlparse(args.host)
baseURL=parsed.scheme+"://"+parsed.netloc
print ("Target URL - " + baseURL)
if args.burp:
burp_xml = xml.etree.ElementTree.fromstring(open(args.burp, "r").read())
for item in burp_xml:
q_burp.put(item)
else:
q.put(baseURL)
print ("")
#Since we do not want to visit the root url again we add it into the visited list
linksVisited.add(baseURL+"/")
if args.threads:
num_threads = int(args.threads)
else:
num_threads=10
#If burp input is provided we first parse it and map our results and then make another list out of it to pass to basic crawling to get maximum results
if args.burp:
print ("\nProcessing Burp file\n")
for i in range(num_threads):
worker = Thread(target=burp_siteMap_parse, args=(q_burp,))
worker.setDaemon(True)
worker.start()
q_burp.join()
print ("\nStarting Crawling\n")
for i in range(num_threads):
worker = Thread(target=do_stuff, args=(q,))
worker.setDaemon(True)
worker.start()
q.join()
print ("\nProcess Completed")