From 0928b5876fba9804198bc96b545dfb8660916183 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 13:41:11 +0530 Subject: [PATCH 01/13] Fixes #122 --- photon.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/photon.py b/photon.py index 4718405..579273c 100644 --- a/photon.py +++ b/photon.py @@ -42,7 +42,7 @@ / %s__%s \/ /_ ____ / /_____ ____ / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\ / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / / - /_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.2.1%s\n''' % + /_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.2.2%s\n''' % (red, white, red, white, red, white, red, white, red, white, red, white, red, white, end)) @@ -180,13 +180,13 @@ supress_regex = False -def intel_extractor(response): +def intel_extractor(url, response): """Extract intel from the response body.""" matches = re.findall(r'([\w\.-]+s[\w\.-]+\.amazonaws\.com)|([\w\.-]+@[\w\.-]+\.[\.\w]+)', response) if matches: for match in matches: verb('Intel', match) - bad_intel.add(match) + bad_intel.add(url + ':' + match) def js_extractor(response): @@ -198,12 +198,18 @@ def js_extractor(response): verb('JS file', match) bad_scripts.add(match) +def remove_file(url): + if url.count('/') > 2: + return url.replace(re.search(r'/[^/]*?$', url).group(), '') + else: + return url + def extractor(url): """Extract details from the response body.""" response = requester(url, main_url, delay, cook, headers, timeout, host, ninja, user_agents, failed, processed) if clone: mirror(url, response) - matches = re.findall(r'<[aA].*(href|HREF)=([^\s>]+)', response) + matches = re.findall(r'<[aA][^>]*?(href|HREF)=([^\s>]+)', response) for link in matches: # Remove everything after a "#" to deal with in-page anchors link = link[1].replace('\'', '').replace('"', '').split('#')[0] @@ -225,13 +231,13 @@ def extractor(url): external.add(link) elif link[:1] == '/': verb('Internal page', link) - internal.add(main_url + link) + internal.add(remove_file(url) + link) else: verb('Internal page', link) - internal.add(main_url + '/' + link) + internal.add(remove_file(url) + '/' + link) if not only_urls: - intel_extractor(response) + intel_extractor(link, response) js_extractor(response) if args.regex and not supress_regex: regxy(args.regex, response, supress_regex, custom) From f697227b357ed91a091b2c5ea417f691f2094a28 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 13:41:57 +0530 Subject: [PATCH 02/13] Fixes unicode output and well....fuck --- core/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/utils.py b/core/utils.py index d79cf20..d7f0d02 100644 --- a/core/utils.py +++ b/core/utils.py @@ -41,7 +41,8 @@ def is_link(url, processed, files): is_file = url.endswith(BAD_TYPES) if is_file: files.add(url) - return is_file + return False + return True return False @@ -78,7 +79,7 @@ def writer(datasets, dataset_names, output_dir): filepath = output_dir + '/' + dataset_name + '.txt' with open(filepath, 'w+') as out_file: joined = '\n'.join(dataset) - out_file.write(str(joined.encode('utf-8'))) + out_file.write(str(joined.encode('utf-8').decode('utf-8'))) out_file.write('\n') def timer(diff, processed): From a4397917b3dc421ad72d2c4684e64d4ae0d1b820 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 13:48:07 +0530 Subject: [PATCH 03/13] Update photon.py --- photon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/photon.py b/photon.py index 579273c..0c3a380 100644 --- a/photon.py +++ b/photon.py @@ -237,7 +237,7 @@ def extractor(url): internal.add(remove_file(url) + '/' + link) if not only_urls: - intel_extractor(link, response) + intel_extractor(url, response) js_extractor(response) if args.regex and not supress_regex: regxy(args.regex, response, supress_regex, custom) From 1d1ed225226c47bb1bd28b279ea90dc99ae34ddc Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:05:43 +0530 Subject: [PATCH 04/13] Update photon.py --- photon.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/photon.py b/photon.py index 0c3a380..00ba342 100644 --- a/photon.py +++ b/photon.py @@ -135,12 +135,11 @@ # URLs that have get params in them e.g. example.com/page.php?id=2 fuzzable = set() endpoints = set() # URLs found from javascript files -processed = set() # URLs that have been crawled +processed = set(['dummy']) # URLs that have been crawled # URLs that belong to the target i.e. in-scope internal = set(args.seeds) everything = [] -bad_intel = set() # Unclean intel urls bad_scripts = set() # Unclean javascript file urls core.config.verbose = verbose @@ -186,7 +185,7 @@ def intel_extractor(url, response): if matches: for match in matches: verb('Intel', match) - bad_intel.add(url + ':' + match) + intel.add(url + ':' + ''.join(list(match))) def js_extractor(response): @@ -200,7 +199,11 @@ def js_extractor(response): def remove_file(url): if url.count('/') > 2: - return url.replace(re.search(r'/[^/]*?$', url).group(), '') + replacable = re.search(r'/[^/]*?$', url).group() + if replacable != '/': + return url.replace(replacable, '') + else: + return url else: return url @@ -225,7 +228,7 @@ def extractor(url): elif link[:2] == '//': if link.split('/')[2].startswith(host): verb('Internal page', link) - internal.add(schema + link) + internal.add(schema + '://' + link) else: verb('External page', link) external.add(link) @@ -234,7 +237,13 @@ def extractor(url): internal.add(remove_file(url) + link) else: verb('Internal page', link) - internal.add(remove_file(url) + '/' + link) + usable_url = remove_file(url) + if usable_url.endswith('/'): + internal.add(usable_url + link) + elif link.startswith('/'): + internal.add(usable_url + link) + else: + internal.add(usable_url + '/' + link) if not only_urls: intel_extractor(url, response) @@ -307,10 +316,8 @@ def jscanner(url): if '=' in url: fuzzable.add(url) - for match in bad_intel: - for x in match: # Because "match" is a tuple - if x != '': # If the value isn't empty - intel.add(x) + for match in intel: + intel.add(match) for url in external: try: if top_level(url, fix_protocol=True) in INTELS: From 5c61efdb1acef084423b744f56398421bd1239da Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:16:49 +0530 Subject: [PATCH 05/13] Update flash.py --- core/flash.py | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/core/flash.py b/core/flash.py index 967dac6..ce06309 100644 --- a/core/flash.py +++ b/core/flash.py @@ -1,14 +1,8 @@ from __future__ import print_function -import sys -import threading +import concurrent.futures from core.colors import info -try: - import concurrent.futures -except ImportError: - pass - def threader(function, *urls): """Start multiple threads for a function.""" @@ -33,23 +27,11 @@ def flash(function, links, thread_count): """Process the URLs and uses a threadpool to execute a function.""" # Convert links (set) to list links = list(links) - if sys.version_info < (3, 2): - for begin in range(0, len(links), thread_count): # Range with step - end = begin + thread_count - splitted = links[begin:end] - threader(function, splitted) - progress = end - if progress > len(links): # Fix if overflow - progress = len(links) - print('\r%s Progress: %i/%i' % (info, progress, len(links)), - end='\r') - sys.stdout.flush() - else: - threadpool = concurrent.futures.ThreadPoolExecutor( + threadpool = concurrent.futures.ThreadPoolExecutor( max_workers=thread_count) - futures = (threadpool.submit(function, link) for link in links) - for i, _ in enumerate(concurrent.futures.as_completed(futures)): - if i + 1 == len(links) or (i + 1) % thread_count == 0: - print('%s Progress: %i/%i' % (info, i + 1, len(links)), - end='\r') + futures = (threadpool.submit(function, link) for link in links) + for i, _ in enumerate(concurrent.futures.as_completed(futures)): + if i + 1 == len(links) or (i + 1) % thread_count == 0: + print('%s Progress: %i/%i' % (info, i + 1, len(links)), + end='\r') print('') From 71d45c44008a283df9282789d2b858dff7a9673e Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:17:23 +0530 Subject: [PATCH 06/13] Update photon.py --- photon.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/photon.py b/photon.py index 00ba342..02a9975 100644 --- a/photon.py +++ b/photon.py @@ -23,13 +23,7 @@ from core.utils import top_level, extract_headers, verb, is_link, entropy, regxy, remove_regex, timer, writer from core.zap import zap -try: - from urllib.parse import urlparse # For Python 3 - python2, python3 = False, True -except ImportError: - from urlparse import urlparse # For Python 2 - python2, python3 = True, False - +from urllib.parse import urlparse # For Python 3 try: input = raw_input From a1ba6b361cc76ad37eba3dc18cad1ee3bc6eb4b9 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:17:55 +0530 Subject: [PATCH 07/13] removed python < 3.2 support --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 6fbe1e4..27e7287 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,6 @@ language: python os: - linux python: - - 2.7 - 3.6 install: - pip install -r requirements.txt From 84663427ffa57dfcfb286896787a9c3d75c2aa29 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:23:01 +0530 Subject: [PATCH 08/13] Update flash.py --- core/flash.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/core/flash.py b/core/flash.py index ce06309..0ee582d 100644 --- a/core/flash.py +++ b/core/flash.py @@ -3,26 +3,6 @@ from core.colors import info - -def threader(function, *urls): - """Start multiple threads for a function.""" - threads = [] - # Because URLs is a tuple - urls = urls[0] - # Iterating over URLs - for url in urls: - task = threading.Thread(target=function, args=(url,)) - threads.append(task) - # Start threads - for thread in threads: - thread.start() - # Wait for all threads to complete their work - for thread in threads: - thread.join() - # Delete threads - del threads[:] - - def flash(function, links, thread_count): """Process the URLs and uses a threadpool to execute a function.""" # Convert links (set) to list From cfb8ce61ddc20137018324872be369c4807dece5 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:27:18 +0530 Subject: [PATCH 09/13] Update .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 27e7287..96eceb0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,4 +13,4 @@ before_script: - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics script: - python photon.py -u "https://stackoverflow.com" -l 1 -d 1 -t 100 --regex "\d{10}" --dns --output="d3v" - - python photon.py -u "https://stackoverflow.com" -l 1 -t 10 --seeds="https://stackoverflow.com/jobs" --only-urls --export=json --ninja + - python photon.py -u "https://rocket.chat" -l 1 -t 10 --seeds="https://stackoverflow.com/jobs" --only-urls --export=json --wayback From 7db6d0294003fc3e0f1d14443b42716f9c627331 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:28:21 +0530 Subject: [PATCH 10/13] remove ninja mode --- photon.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/photon.py b/photon.py index 02a9975..eeb20b3 100644 --- a/photon.py +++ b/photon.py @@ -25,12 +25,6 @@ from urllib.parse import urlparse # For Python 3 -try: - input = raw_input -except NameError: - pass - - # Just a fancy ass banner print('''%s ____ __ __ / %s__%s \/ /_ ____ / /_____ ____ @@ -76,8 +70,6 @@ action='store_true') parser.add_argument('--dns', help='enumerate subdomains and DNS data', dest='dns', action='store_true') -parser.add_argument('--ninja', help='ninja mode', dest='ninja', - action='store_true') parser.add_argument('--keys', help='find secret keys', dest='api', action='store_true') parser.add_argument('--update', help='update photon', dest='update', @@ -112,7 +104,6 @@ timeout = args.timeout or 6 # HTTP request timeout cook = args.cook or None # Cookie api = bool(args.api) # Extract high entropy strings i.e. API keys and stuff -ninja = bool(args.ninja) # Ninja mode toggle crawl_level = args.level or 2 # Crawling level thread_count = args.threads or 2 # Number of threads only_urls = bool(args.only_urls) # Only URLs mode is off by default @@ -203,7 +194,7 @@ def remove_file(url): def extractor(url): """Extract details from the response body.""" - response = requester(url, main_url, delay, cook, headers, timeout, host, ninja, user_agents, failed, processed) + response = requester(url, main_url, delay, cook, headers, timeout, host, user_agents, failed, processed) if clone: mirror(url, response) matches = re.findall(r'<[aA][^>]*?(href|HREF)=([^\s>]+)', response) @@ -254,7 +245,7 @@ def extractor(url): def jscanner(url): """Extract endpoints from JavaScript code.""" - response = requester(url, main_url, delay, cook, headers, timeout, host, ninja, user_agents, failed, processed) + response = requester(url, main_url, delay, cook, headers, timeout, host, user_agents, failed, processed) # Extract URLs/endpoints matches = re.findall(r'[\'"](/.*?)[\'"]|[\'"](http.*?)[\'"]', response) # Iterate over the matches, match is a tuple From eeefd3b5ac66ef188d423e05d60d91032d157549 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:28:58 +0530 Subject: [PATCH 11/13] remove ninja mode --- core/requester.py | 132 ++++++---------------------------------------- 1 file changed, 17 insertions(+), 115 deletions(-) diff --git a/core/requester.py b/core/requester.py index 3986f34..0ee582d 100644 --- a/core/requester.py +++ b/core/requester.py @@ -1,115 +1,17 @@ -import random -import time - -import requests -from requests.exceptions import TooManyRedirects - - -SESSION = requests.Session() -SESSION.max_redirects = 3 - -def requester( - url, - main_url=None, - delay=0, - cook=None, - headers=None, - timeout=10, - host=None, - ninja=False, - user_agents=None, - failed=None, - processed=None - ): - """Handle the requests and return the response body.""" - cook = cook or set() - headers = headers or set() - user_agents = user_agents or ['Photon'] - failed = failed or set() - processed = processed or set() - # Mark the URL as crawled - processed.add(url) - # Pause/sleep the program for specified time - time.sleep(delay) - - def normal(url): - """Default request""" - final_headers = headers or { - 'Host': host, - # Selecting a random user-agent - 'User-Agent': random.choice(user_agents), - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Accept-Encoding': 'gzip', - 'DNT': '1', - 'Connection': 'close', - } - try: - response = SESSION.get( - url, - cookies=cook, - headers=final_headers, - verify=False, - timeout=timeout, - stream=True - ) - except TooManyRedirects: - return 'dummy' - if 'text/html' in response.headers['content-type']: - if response.status_code != '404': - return response.text - else: - response.close() - failed.add(url) - return 'dummy' - else: - response.close() - return 'dummy' - - def facebook(url): - """Interact with the developer.facebook.com API.""" - return requests.get( - 'https://developers.facebook.com/tools/debug/echo/?q=' + url, - verify=False - ).text - - def pixlr(url): - """Interact with the pixlr.com API.""" - if url == main_url: - # Because pixlr throws error if http://example.com is used - url = main_url + '/' - return requests.get( - 'https://pixlr.com/proxy/?url=' + url, - headers={'Accept-Encoding': 'gzip'}, - verify=False - ).text - - def code_beautify(url): - """Interact with the codebeautify.org API.""" - headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0', - 'Accept': 'text/plain, */*; q=0.01', - 'Accept-Encoding': 'gzip', - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - 'Origin': 'https://codebeautify.org', - 'Connection': 'close', - } - return requests.post( - 'https://codebeautify.com/URLService', - headers=headers, - data='path=' + url, - verify=False - ).text - - def photopea(url): - """Interact with the www.photopea.com API.""" - return requests.get( - 'https://www.photopea.com/mirror.php?url=' + url, verify=False).text - - if ninja: # If the ninja mode is enabled - # Select a random request function i.e. random API - response = random.choice( - [photopea, normal, facebook, pixlr, code_beautify])(url) - return response or 'dummy' - else: - return normal(url) +from __future__ import print_function +import concurrent.futures + +from core.colors import info + +def flash(function, links, thread_count): + """Process the URLs and uses a threadpool to execute a function.""" + # Convert links (set) to list + links = list(links) + threadpool = concurrent.futures.ThreadPoolExecutor( + max_workers=thread_count) + futures = (threadpool.submit(function, link) for link in links) + for i, _ in enumerate(concurrent.futures.as_completed(futures)): + if i + 1 == len(links) or (i + 1) % thread_count == 0: + print('%s Progress: %i/%i' % (info, i + 1, len(links)), + end='\r') + print('') From 56e184259e7fdf17339935499ff38210cb8a4293 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:31:41 +0530 Subject: [PATCH 12/13] what the fuck man --- core/requester.py | 81 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 15 deletions(-) diff --git a/core/requester.py b/core/requester.py index 0ee582d..028b535 100644 --- a/core/requester.py +++ b/core/requester.py @@ -1,17 +1,68 @@ -from __future__ import print_function -import concurrent.futures +import random +import time -from core.colors import info +import requests +from requests.exceptions import TooManyRedirects -def flash(function, links, thread_count): - """Process the URLs and uses a threadpool to execute a function.""" - # Convert links (set) to list - links = list(links) - threadpool = concurrent.futures.ThreadPoolExecutor( - max_workers=thread_count) - futures = (threadpool.submit(function, link) for link in links) - for i, _ in enumerate(concurrent.futures.as_completed(futures)): - if i + 1 == len(links) or (i + 1) % thread_count == 0: - print('%s Progress: %i/%i' % (info, i + 1, len(links)), - end='\r') - print('') + +SESSION = requests.Session() +SESSION.max_redirects = 3 + +def requester( + url, + main_url=None, + delay=0, + cook=None, + headers=None, + timeout=10, + host=None, + user_agents=None, + failed=None, + processed=None + ): + """Handle the requests and return the response body.""" + cook = cook or set() + headers = headers or set() + user_agents = user_agents or ['Photon'] + failed = failed or set() + processed = processed or set() + # Mark the URL as crawled + processed.add(url) + # Pause/sleep the program for specified time + time.sleep(delay) + + def make_request(url): + """Default request""" + final_headers = headers or { + 'Host': host, + # Selecting a random user-agent + 'User-Agent': random.choice(user_agents), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip', + 'DNT': '1', + 'Connection': 'close', + } + try: + response = SESSION.get( + url, + cookies=cook, + headers=final_headers, + verify=False, + timeout=timeout, + stream=True + ) + except TooManyRedirects: + return 'dummy' + if 'text/html' in response.headers['content-type']: + if response.status_code != '404': + return response.text + else: + response.close() + failed.add(url) + return 'dummy' + else: + response.close() + return 'dummy' + + return make_request(url) From 5d468cf4ea06927f93828c816dcdbb78433f9324 Mon Sep 17 00:00:00 2001 From: Somdev Sangwan Date: Fri, 5 Apr 2019 17:41:25 +0530 Subject: [PATCH 13/13] display compatibilty warning --- photon.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/photon.py b/photon.py index eeb20b3..8274db1 100644 --- a/photon.py +++ b/photon.py @@ -6,13 +6,28 @@ import argparse import os import re +import requests import sys import time import warnings -import requests - from core.colors import good, info, run, green, red, white, end + +# Just a fancy ass banner +print('''%s ____ __ __ + / %s__%s \/ /_ ____ / /_____ ____ + / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\ + / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / / + /_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.2.2%s\n''' % + (red, white, red, white, red, white, red, white, red, white, red, white, + red, white, end)) + +try: + from urllib.parse import urlparse # For Python 3 +except ImportError: + print ('%s Photon runs only on Python 3.2 and above.' % info) + quit() + import core.config from core.config import INTELS from core.flash import flash @@ -23,16 +38,6 @@ from core.utils import top_level, extract_headers, verb, is_link, entropy, regxy, remove_regex, timer, writer from core.zap import zap -from urllib.parse import urlparse # For Python 3 - -# Just a fancy ass banner -print('''%s ____ __ __ - / %s__%s \/ /_ ____ / /_____ ____ - / %s/_/%s / __ \/ %s__%s \/ __/ %s__%s \/ __ \\ - / ____/ / / / %s/_/%s / /_/ %s/_/%s / / / / - /_/ /_/ /_/\____/\__/\____/_/ /_/ %sv1.2.2%s\n''' % - (red, white, red, white, red, white, red, white, red, white, red, white, - red, white, end)) # Disable SSL related warnings warnings.filterwarnings('ignore')