diff --git a/device-pharmer-python3.py b/device-pharmer-python3.py new file mode 100644 index 0000000..3ad068d --- /dev/null +++ b/device-pharmer-python3.py @@ -0,0 +1,464 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +''' +Concurrently open either Shodan search results, a specified IP range, a +single IP, or domain and print the status and title of the page. +Add the -u and -p options to attempt to login to the page. +Use -f to look for a certain string in the html response to check if +authentication succeeded. Will attempt to login first via a login form +and should that fail or not exist, login via HTTP Basic Auth. + +eequires: Linux + Python 2.7 + gevents + mechanize + BeautifulSoup + shodan + + +__author__ = Dan McInerney + danmcinerney.org + Twitter: danhmcinerney + Gmail: danhmcinerney +''' + +#This must be one of the first imports or else we get threading error on completion +from gevent import monkey +monkey.patch_all() + +# Overzealously prevent mechanize's gzip warning +import warnings +warnings.filterwarnings("ignore") + +import gevent +import argparse +import mechanize +from bs4 import BeautifulSoup +#import cookielib +import http.cookiejar as cookielib +from socket import setdefaulttimeout +import re +from sys import exit + +# Including lxml in case someone wants to use it instead of BeautifulSoup +#import lxml +#from lxml.html import fromstring + +def parse_args(): + """Create the arguments""" + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog='-----------------------------------------------------------------------------------\n' + 'Examples:\n\n' + ' -Search Shodan for "dd-wrt" using the specified API key and print the title of each\n' + ' result\'s response page:\n\n' + ' python device-pharmer.py -s "dd-wrt" -a Wutc4c3T78gRIKeuLZesI8Mx2ddOiP4\n\n' + ' -Open a range of IP addresses, print the title of each response page and\n' + ' make 100 requests concurrently:\n\n' + ' python device-pharmer.py -t 192.168.0-5.1-254 -c 100\n\n' + ' -Search Shodan for "dd-wrt" and attempt to login with "root" using password "admin"\n' + ' then check the response page\'s html for the string ">Advanced Routing<" and last,\n' + ' use a proxy of 123.12.12.123 on port 8080:\n\n' + ' python device-pharmer.py -s "dd-wrt" -u root -p admin -f ">Advanced Routing<" --proxy 123.12.12.123:8080\n\n' + ' -Open www.reddit.com specifically with https:// and attempt to login using "sirsmit418"\n' + ' with the password "whoopwhoop":\n\n' + ' python device-pharmer.py -t www.reddit.com -ssl -u sirsmit418 -p whoopwhoop') + + parser.add_argument("-a", "--apikey", help="Your api key") + parser.add_argument("-c", "--concurrent", default='1000', help="Enter number of concurrent requests to make; default = 1000") + parser.add_argument("-f", "--findstring", help="Search html for a string; can be used to determine if a login was successful") + parser.add_argument("-n", "--numpages", default='1', help="Number of pages deep to go in Shodan results with 100 results per page; default is 1") + parser.add_argument("-p", "--password", help="Enter password after this argument") + parser.add_argument("--proxy", help="Enter a proxy to use, e.g. --proxy user:password@123.12.12.123:8080 or just a simple IP:port will work too") + parser.add_argument("-s", "--shodansearch", help="Your search terms") + parser.add_argument("-ssl", help="Test all the results using https:// rather than default http://", action="store_true") + parser.add_argument("-t", "--targets", help="Enter an IP, a domain, or a range of IPs to fetch (e.g. 192.168.0-5.1-254 will" + "fetch 192.168.0.1 to 192.168.5.254; if using a domain include the subdomain if it exists: sub.domain.com or domain.com)") + parser.add_argument("--timeout", default='15', help="Set the timeout for each URI request in seconds; default is 15") + parser.add_argument("-u", "--username", help="Enter username after this argument") + parser.add_argument("--ipfile", help="Test IPs from a text file (IPs should be separated by newlines)") + return parser.parse_args() + +def shodan_search(search, apikey, pages, ipfile): + import shodan + + if apikey: + API_KEY = apikey + else: + API_KEY = 'ENTER YOUR API KEY HERE AND KEEP THE QUOTES' + + api = shodan.Shodan(API_KEY) + + ips_found = [] + + # Get IPs from Shodan search results + try: + results = api.search(search, page=1) + total_results = results['total'] + print('[+] Results: %d' % total_results) + print('[*] Page 1...') + pages = max_pages(pages, total_results) + for r in results['matches']: + # Replace the following ports with port 80 since they'll virtually never have a web server running + # ftp, ssh, telnet, smtp, smtp, netbios x3, smb + if r['port'] in [21, 22, 23, 25, 26, 137, 138, 139, 445]: + r['port'] = 80 + ips_found.append('%s:%s' % (r['ip_str'], r['port'])) + + if pages > 1: + i = 2 + while i <= pages: + results = api.search(search, page=i) + print('[*] Page %d...' % i) + for r in results['matches']: + ips_found.append(r['ip_str']) + i += 1 + + return ips_found + + except Exception as e: + print ('[!] Shodan search error:', e) + +def get_ips_from_file(ipfile): + ''' Read IPs from a file ''' + ips_found = [] + try: + with open(ipfile) as ips: + for line in ips: + ip = line.strip() + ips_found.append(ip) + except IOError: + exit('[!] Are you sure the file %s exists in this directory?' % ipfile) + + return ips_found + +def max_pages(pages, total_results): + ''' Measures the max # of pages in Shodan results. Alternative to this + would be to measure len(results['matches']) and stop when that is zero, + but that would mean 1 extra api lookup which would add some pointless + seconds to the search ''' + + total_pages = (total_results+100)/100 + if pages > total_pages: + pages = total_pages + return pages + else: + return pages + +def browser_mechanize(proxy, ssl): + ''' Start headless browser ''' + br = mechanize.Browser() + # Cookie Jar + cj = cookielib.LWPCookieJar() + br.set_cookiejar(cj) + # Browser options + if proxy and ssl: + print ('[*] HTTPS proxies are unsupported, using the proxy specified anyway: %s' % proxy) + br.set_proxies({'http':proxy}) + elif proxy: + print ('[*] HTTP proxy being employed: %s' % proxy) + br.set_proxies({'http':proxy}) + br.set_handle_equiv(True) + br.set_handle_gzip(True) + br.set_handle_redirect(True) + br.set_handle_referer(True) + br.set_handle_robots(False) + # Follows refresh 0 but not hangs on refresh > 0 + br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) + br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko')] + return br + +class Scraper(): + + def __init__(self, args): + self.user = args.username + self.passwd = args.password + self.findstring = args.findstring + self.search = args.shodansearch + self.ipfile = args.ipfile + self.ssl = args.ssl + if self.ssl: + self.uri_prefix = 'https://' + else: + self.uri_prefix = 'http://' + self.proxy = args.proxy + self.targets = args.targets + self.br = browser_mechanize(self.proxy, self.ssl) + + def run(self, target): + target = self.uri_prefix+target + try: + resp, brtitle = self.req(target) + title, match = self.html_parser(resp, brtitle) + if match: + mark = '+' + label = match + else: + mark = '*' + label = 'Title: ' + sublabel = title + except Exception as e: + mark = '-' + label = 'Exception:' + sublabel = str(e) + + self.final_print(mark, target, label, sublabel) + + def req(self, target): + ''' Determine what type of auth to use, if any ''' + if self.user and self.passwd: + # Attempts to login via text boxes + # Failing that, tries basic auth + # Failing that, tries no auth + return self.resp_to_textboxes(target) + return self.resp_no_auth(target) + + ############################################################################# + # Get response functions + ############################################################################# + def resp_no_auth(self, target): + ''' No username/password argument given ''' + no_auth_resp = self.br.open('%s' % target) + try: + brtitle = self.br.title() + except Exception: + brtitle = None + return no_auth_resp, brtitle + + def resp_basic_auth(self, target): + ''' When there are no login forms on page but -u and -p are given''' + self.br.add_password('%s' % target, self.user, self.passwd) + basic_auth_resp = self.br.open('%s' % target) + try: + brtitle = self.br.title() + except Exception: + brtitle = None + return basic_auth_resp, brtitle + + def resp_to_textboxes(self, target): + ''' Find the first form on the page that has exactly 1 text box and 1 password box. + Fill it out with the credentials the user provides. If no form is found, try + authenticating with HTTP Basic Auth and if that also fails, try just getting a response. ''' + brtitle1 = None + + try: + resp = self.br.open('%s' % target) + try: + brtitle1 = self.br.title() + except Exception: + brtitle1 = None + forms = self.br.forms() + self.br.form = self.find_password_form(forms) + resp = self.fill_out_form() + try: + brtitle = self.br.title() + except Exception: + brtitle = None + except Exception: + # If trying to login via form, try basic auth + try: + resp, brtitle = self.resp_basic_auth(target) + except Exception: + # If basic auth failed as well, try no auth + resp, brtitle = self.resp_no_auth(target) + + if brtitle == None and brtitle1: + brtitle = brtitle1 + + return resp, brtitle + + def find_password_form(self, forms): + for f in forms: + pw = 0 + text = 0 + for c in f.controls: + if c.type == 'text': + text = text+1 + if c.type == 'password': + pw = pw+1 + if pw == 1 and text == 1: + return f + + def fill_out_form(self): + ''' Find the first text and password controls and fill them out ''' + text_found = 0 + pw_found = 0 + for c in self.br.form.controls: + if c.type == 'text': + # Only get the first text control box + if text_found == 0: + c.value = self.user + text_found = 1 + continue + if c.type == 'password': + c.value = self.passwd + pw_found = 1 + break + + form_resp = self.br.submit() + return form_resp + ############################################################################# + + def html_parser(self, resp, brtitle): + ''' Parse html, look for a match with user arg + and find the title. ''' + html = resp.read() + + # Find match + match = self.find_match(html) + + # Including lxml in case someone has more success with it + # My test showed that BeautifulSoup encountered a few less encoding errors (~3% vs 5% from lxml) + # root = fromstring(html) + # find_title = root.xpath('//title') + # try: + # title = find_title[0].text + # except Exception as e: + # title = '' + + # Get title + soup = BeautifulSoup(html) + title = None + try: + title = soup.title.string + except AttributeError as e: + if brtitle: + title == brtitle + except Exception as e: + title = str(e) + + if brtitle and not title: + title = brtitle + + return title, match + + def find_match(self, html): + match = None + if self.findstring: + if self.findstring in html: + match = '* MATCH * ' + return match + + def final_print(self, mark, target, label, sublabel): + target = target.ljust(29) + + if self.search: + name = self.search + elif self.targets: + name = self.targets + elif self.ipfile: + name = self.ipfile + + name = name.replace('/', '') + + try: + results = '[%s] %s | %s %s' % (mark, target, label, sublabel) + if mark == '*' or mark == '+': + with open('%s_results.txt' % name, 'a+') as f: + f.write('[%s] %s | %s %s\n' % (mark, target, label, sublabel)) + print (results) + except Exception as e: + results = '[%s] %s | %s %s' % (mark, target, label, str(e)) + with open('%s_results.txt' % name, 'a+') as f: + f.write('%s\n' % results) + print (results) + + +############################################################################# +# IP range target handlers +# Taken from against.py by pigtails23 with minor modification +############################################################################# +def get_targets_from_args(targets): + target_type = check_targets(targets) + if target_type: + if target_type == 'domain' or target_type == 'ip': + return ['%s' % targets] + elif target_type == 'ip range': + return ip_range(targets) + +def check_targets(targets): + ''' This could use improvement but works fine would be + nice to get a good regex just for finding IP ranges ''' + if re.match('^[A-Za-z]', targets): # starts with a letter + return 'domain' + elif targets.count('.') == 3 and '-' in targets: + return 'ip range' + #if re.match('(?=.*-)', targets): + # return 'ip range' + elif re.match(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$", targets): + return 'ip' + else: + return None + +def handle_ip_range(iprange): + parted = tuple(part for part in iprange.split('.')) + rsa = range(4) + rsb = range(4) + for i in range(4): + hyphen = parted[i].find('-') + if hyphen != -1: + rsa[i] = int(parted[i][:hyphen]) + rsb[i] = int(parted[i][1+hyphen:]) + 1 + else: + rsa[i] = int(parted[i]) + rsb[i] = int(parted[i]) + 1 + return rsa, rsb + +def ip_range(iprange): + rsa, rsb = handle_ip_range(iprange) + ips = [] + counter = 0 + for i in range(rsa[0], rsb[0]): + for j in range(rsa[1], rsb[1]): + for k in range(rsa[2], rsb[2]): + for l in range(rsa[3], rsb[3]): + ip = '%d.%d.%d.%d' % (i, j, k, l) + ips.append(ip) + return ips +############################################################################# + +def input_check(args): + ''' Check for multi inputs, or lack of target inputs ''' + if not args.targets and not args.shodansearch and not args.ipfile: + exit('[!] No targets found. Use the -s option to specify a search term for Shodan, the -t option to specify an IP, IP range, or domain, or use the --ipfile option to read from a list of IPs in a text file') + + inputs = 0 + if args.targets: + inputs += 1 + if args.shodansearch: + inputs += 1 + if args.ipfile: + inputs += 1 + if inputs > 1: + exit('[!] Multiple target inputs specified, choose just one target input option: -t (IP, IP range, or domain), -s (Shodan search results), or --ipfile (IPs from a text file)') + +def main(args): + + S = Scraper(args) + + input_check(args) + + if args.targets: + targets = get_targets_from_args(args.targets) + elif args.shodansearch: + targets = shodan_search(args.shodansearch, args.apikey, int(args.numpages), args.ipfile) + elif args.ipfile: + targets = get_ips_from_file(args.ipfile) + + if targets == [] or targets == None: + exit('[!] No valid targets') + + # Mechanize doesn't respect timeouts when it comes to reading/waiting for SSL info so this is necessary + setdefaulttimeout(int(args.timeout)) + + con = int(args.concurrent) + + # By default run 1000 concurrently at a time + target_groups = [targets[x:x+con] for x in range(0, len(targets), con)] + for chunk_targets in target_groups: + jobs = [gevent.spawn(S.run, target) for target in chunk_targets] + gevent.joinall(jobs) + +if __name__ == "__main__": + main(parse_args())