Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: latin-1 -*- ######################################################
- # ____ _ __ #
- # ___ __ __/ / /__ ___ ______ ______(_) /___ __ #
- # / _ \/ // / / (_-</ -_) __/ // / __/ / __/ // / #
- # /_//_/\_,_/_/_/___/\__/\__/\_,_/_/ /_/\__/\_, / #
- # /___/ team #
- # #
- # crawlb0y.py - fast and easy to use url + parameter crawler #
- # #
- # FILE #
- # crawlb0y.py #
- # #
- # DATE #
- # 2013-10-31 #
- # #
- # DESCRIPTION #
- # 'crawlb0y.py' is a very fast url + parameter crawler ... #
- # ... Faster, Harder, Craaaawleer!!1!1 #
- # #
- # AUTHOR #
- # pigtail23 aka pgt #
- # #
- ################################################################################
- import threading
- import urllib2
- import time
- import os
- import argparse
- from socket import *
- # crawlb0y.py version string
- VERSION="v0.1.1"
- # print our nice banner ;)
- def banner():
- print '--==[ crawlb0y.py by pigtail23@nullsecurity.net ]==--'
- # print version
- def version():
- print '[+] crawlb0y.py %s' % (VERSION)
- exit(0)
- # the best code part ;)
- def finished():
- print '\n[!] h4ppy 0wn1ng'
- # defines the command line parameter and help page
- def argspage():
- parser = argparse.ArgumentParser(
- usage='\n ./%(prog)s -u <arg> [options]',
- formatter_class=argparse.RawDescriptionHelpFormatter,
- epilog=
- 'examples:\n\n' \
- ' crawl also links with other domain name\n' \
- ' usage: ./%(prog)s -u https://www.xxx.com/ -s api.xxx.com\n\n' \
- ' show links with parameters only\n' \
- ' usage: ./%(prog)s -u www.xxx.com -O',
- add_help=False
- )
- options = parser.add_argument_group('options', '')
- options.add_argument('-u', default=False, metavar='<url>',
- help='url to test')
- options.add_argument('-s', default='gimmaapibro', metavar='<url>',
- help='grab and crawl other found urls also (e.g. api.xxx.com)')
- options.add_argument('-P', default=False, metavar='<num>',
- type=int, help='port')
- options.add_argument('-d', default=1, metavar='<num>',
- type=int, help='crawl depth level (default: 1)')
- options.add_argument('-e', default='w00tw00tn00b', metavar='<string>',
- help='urls must contain given <string> (e.g .php)')
- options.add_argument('-N', const=True, action='store_const',
- help='print urls without parameters')
- options.add_argument('-O', const=True, action='store_const',
- help='print urls with parameters only')
- options.add_argument('-A', default=False, metavar='<string>',
- help='http basic auth: username:password')
- options.add_argument('-t', default=4, metavar='<num>',
- type=int, help='threads (default: 4)')
- options.add_argument('-T', default=3, metavar='<sec>',
- type=int, help='timeout in seconds (default: 3)')
- options.add_argument('-o', default=False, metavar='<filename>',
- help='write urls to file')
- options.add_argument('-p', default=False, metavar='<filename>',
- help='write parameters to file')
- options.add_argument('-V', action='store_true',
- help='print version of crawlb0y.py and exit')
- args = parser.parse_args()
- if args.V:
- version()
- if (args.u == False):
- print ''
- parser.print_help()
- exit(0)
- return args
- # write urls and parameters to file
- def writetofile(args, content, check):
- if (args.o != False) and (check == 0):
- filename = args.o
- elif (args.p != False) and (check == 1):
- filename = args.p
- else:
- return
- content = '%s\n' % (content)
- try:
- outfile = open(filename, 'a')
- outfile.write(content)
- outfile.close()
- except:
- print '[-] cannot write to file'
- os._exit(1)
- # print what?
- def print_baby(what):
- print '[+] %s' % (what)
- # print found urls
- def printurls(args, urls):
- if args.N:
- noparams = []
- for url in urls:
- url = url.split('?')
- noparams.append(url[0])
- urls = set(noparams)
- print '\n[*] crawled urls\n================\n'
- for url in urls:
- if (url.find(args.e) == -1) and (args.e != 'w00tw00tn00b'):
- continue
- if args.O == True:
- if (url.find('?') != -1) and (url.find('=') != -1):
- print_baby(url)
- writetofile(args, url, 0)
- else:
- print_baby(url)
- writetofile(args, url, 0)
- print '\n'
- # grabbing and printing parameters (will be refactored)
- def parameters(args, filtered):
- params = []
- for url in filtered:
- u = 0
- url = url.replace('&', '&')
- while u < len(url):
- begin = url[u:].find('?')
- if (begin == -1):
- begin_two = url[u:].find('&')
- if (begin_two != -1):
- begin_two += u
- end = url[begin_two+1:].find('=')
- if end == -1:
- break
- u = end + begin_two
- params.append(url[begin_two+1:u+1])
- else:
- u = len(url)
- else:
- if not url[begin+1:]:
- break
- end = url[begin:].find('=')
- if end == -1:
- break
- u = begin + end
- params.append(url[begin+1:u])
- url = url.replace('?', '%3f')
- params = set(params)
- print '\n[*] grabbed parameters\n======================\n'
- for param in params:
- if (param.find('http://') == -1) and (param.find('https://') == -1):
- writetofile(args, param, 1)
- print_baby(param)
- # blacklist for url content
- def checkblack(url):
- blacklist = (['.css', '.ico', '.jpg', 'mailto:', 'javascript:', '.doc',
- '.pdf', '.png'])
- for black in blacklist:
- if url.find(black) != -1:
- return True
- # sub / any domain grabbing
- def subdomain(subdomain, url):
- begin = url.find(subdomain)
- if begin != -1:
- return True
- # filter all found urls
- def filterurls(args):
- filtered = []
- start_url = args.u
- for url in URLLIST:
- url = url.replace('&', '&')
- if len(url) < 2:
- continue
- elif url == start_url:
- continue
- elif checkblack(url) == True:
- continue
- elif url.find(start_url) != -1:
- filtered.append(url)
- elif (url.find('http://') == -1) and (url.find('https://') == -1):
- if url[0] == '/':
- url = url[1:]
- url = '%s%s' % (start_url, url)
- filtered.append(url)
- elif subdomain(args.s, url) == True:
- filtered.append(url)
- return set(filtered)
- # find urls after request
- def findurls(response):
- u = 0
- while u < len(response):
- begin = response[u:].find('href="')
- if begin == -1:
- return 1
- begin += 6 + u
- end = response[begin:].find('"') + begin + 1
- URLLIST.append(response[begin-1:end].replace('"', ''))
- u = end
- # send http/https request to server
- def scan(url, to, i):
- request = urllib2.Request(url)
- try:
- response = urllib2.urlopen(request, timeout = to)
- findurls(response.read())
- except urllib2.HTTPError, e:
- return
- except urllib2.URLError, e:
- reason = '%s' % (e.reason)
- if reason == '[Errno 61] Connection refused':
- print '[-] Connection refused: %s' % (url)
- os._exit(1)
- if reason == 'timed out':
- print '[-] %s timed out' % (url)
- if i < 10:
- i =+ 1
- scan(url, to, i)
- return
- except:
- if i < 10:
- i =+ 1
- scan(url, to, i)
- return
- # w3 <3 7hr34d1ng
- def crawlmore(args, urls):
- to = args.T
- threads = args.t
- for url in urls:
- Run = threading.Thread(target=scan, args=(url, to, 0,))
- Run.start()
- # checks that we a max number of threads
- while threading.activeCount() > threads:
- time.sleep(0.01)
- time.sleep(0.001)
- # waiting for the last running threads
- while threading.activeCount() > 1:
- time.sleep(0.1)
- # test for open http/https port (better than urllib2 stuff)
- def test_http(target, timeout):
- try:
- s = socket(AF_INET, SOCK_STREAM)
- s.settimeout(timeout)
- result = s.connect_ex((target, 80))
- s.close()
- if result == 0:
- return 0
- s = socket(AF_INET, SOCK_STREAM)
- s.settimeout(timeout)
- result = s.connect_ex((target, 443))
- s.close()
- if result == 0:
- return 1
- return
- except:
- print '[-] cannot resolve ip'
- os._exit(1)
- # check url format, make it valid for urllib2
- def checkformat(args):
- url = args.u
- to = args.T
- if (url.find('http://') == -1) and (url.find('https://') == -1):
- if test_http(url, to) == 0:
- url = 'http://%s' % (url)
- elif test_http(url, to) == 1:
- url = 'https://%s' % (url)
- else:
- print '[-] cannot connect'
- os._exit(1)
- if url[len(url) - 1].find('/') == -1:
- url = '%s/' % (url)
- return url
- # 57up1d m41n
- def main():
- banner()
- args = argspage()
- args.u = checkformat(args)
- urls = [ args.u ]
- print '\n[*] crawling %s' % (args.u)
- for i in range(args.d):
- crawlmore(args, urls)
- urls = filterurls(args)
- printurls(args, urls)
- parameters(args, urls)
- finished()
- if __name__ == '__main__':
- URLLIST = []
- try:
- main()
- except KeyboardInterrupt:
- print '\nbye bye!!!'
- time.sleep(0.01)
- os._exit(1)
- # EOF
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement