bitch

#!/usr/bin/env python
# -*- coding: latin-1 -*- ######################################################
#                ____                     _ __                                 #
#     ___  __ __/ / /__ ___ ______ ______(_) /___ __                           #
#    / _ \/ // / / (_-</ -_) __/ // / __/ / __/ // /                           #
#   /_//_/\_,_/_/_/___/\__/\__/\_,_/_/ /_/\__/\_, /                            #
#                                            /___/ team                        #
#                                                                              #
# crawlb0y.py - fast and easy to use url + parameter crawler                   #
#                                                                              #
# FILE                                                                         #
# crawlb0y.py                                                                  #
#                                                                              #
# DATE                                                                         #
# 2013-10-31                                                                   #
#                                                                              #
# DESCRIPTION                                                                  #
# 'crawlb0y.py' is a very fast url + parameter crawler ...                     #
# ... Faster, Harder, Craaaawleer!!1!1                                         #
#                                                                              #
# AUTHOR                                                                       #
# pigtail23 aka pgt                                                            #
#                                                                              #
################################################################################


import threading
import urllib2
import time
import os
import argparse
from socket import *


# crawlb0y.py version string
VERSION="v0.1.1"


# print our nice banner ;)
def banner():
    print '--==[ crawlb0y.py by pigtail23@nullsecurity.net ]==--'

# print version
def version():
    print '[+] crawlb0y.py %s' % (VERSION)
    exit(0)

# the best code part ;)
def finished():
    print '\n[!] h4ppy 0wn1ng'

# defines the command line parameter and help page
def argspage():
    parser = argparse.ArgumentParser(
    usage='\n   ./%(prog)s -u <arg> [options]',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=
    'examples:\n\n' \

    '  crawl also links with other domain name\n' \
    '  usage: ./%(prog)s -u https://www.xxx.com/ -s api.xxx.com\n\n' \

    '  show links with parameters only\n' \
    '  usage: ./%(prog)s -u www.xxx.com -O',
    add_help=False
    )

    options = parser.add_argument_group('options', '')
    options.add_argument('-u', default=False, metavar='<url>',
            help='url to test')
    options.add_argument('-s', default='gimmaapibro', metavar='<url>',
            help='grab and crawl other found urls also (e.g. api.xxx.com)')
    options.add_argument('-P', default=False, metavar='<num>',
            type=int, help='port')
    options.add_argument('-d', default=1, metavar='<num>',
            type=int, help='crawl depth level (default: 1)')
    options.add_argument('-e', default='w00tw00tn00b', metavar='<string>',
            help='urls must contain given <string> (e.g .php)')
    options.add_argument('-N', const=True, action='store_const',
            help='print urls without parameters')
    options.add_argument('-O', const=True, action='store_const',
            help='print urls with parameters only')
    options.add_argument('-A', default=False, metavar='<string>',
            help='http basic auth: username:password')
    options.add_argument('-t', default=4, metavar='<num>',
            type=int, help='threads (default: 4)')
    options.add_argument('-T', default=3, metavar='<sec>',
            type=int, help='timeout in seconds (default: 3)')
    options.add_argument('-o', default=False, metavar='<filename>',
            help='write urls to file')
    options.add_argument('-p', default=False, metavar='<filename>',
            help='write parameters to file')
    options.add_argument('-V', action='store_true',
            help='print version of crawlb0y.py and exit')

    args = parser.parse_args()

    if args.V:
        version()

    if (args.u == False):
        print ''
        parser.print_help()
        exit(0)

    return args

# write urls and parameters to file
def writetofile(args, content, check):

    if (args.o != False) and (check == 0):
        filename = args.o
    elif (args.p != False) and (check == 1):
        filename = args.p
    else:
        return

    content = '%s\n' % (content)

    try:
        outfile = open(filename, 'a')
        outfile.write(content)
        outfile.close()
    except:
        print '[-] cannot write to file'
        os._exit(1)

# print what?
def print_baby(what):
    print '[+] %s' % (what)

# print found urls
def printurls(args, urls):

    if args.N:
        noparams = []
        for url in urls:
            url = url.split('?')
            noparams.append(url[0])
        urls = set(noparams)

    print  '\n[*] crawled urls\n================\n'
    for url in urls:
        if (url.find(args.e) == -1) and (args.e != 'w00tw00tn00b'):
            continue
        if args.O == True:
            if (url.find('?') != -1) and (url.find('=') != -1):
                print_baby(url)
                writetofile(args, url, 0)
        else:
            print_baby(url)
            writetofile(args, url, 0)
    print '\n'

# grabbing and printing parameters (will be refactored)
def parameters(args, filtered):
    params = []
    for url in filtered:
        u = 0
        url = url.replace('&amp;', '&')
        while u < len(url):
            begin = url[u:].find('?')
            if (begin == -1):
                begin_two = url[u:].find('&')
                if (begin_two != -1):
                    begin_two += u
                    end = url[begin_two+1:].find('=')
                    if end == -1:
                        break
                    u = end + begin_two
                    params.append(url[begin_two+1:u+1])
                else:
                    u = len(url)
            else:
                if not url[begin+1:]:
                    break
                end = url[begin:].find('=')
                if end == -1:
                    break
                u = begin + end
                params.append(url[begin+1:u])
                url = url.replace('?', '%3f')

    params = set(params)
    print  '\n[*] grabbed parameters\n======================\n'
    for param in params:
        if (param.find('http://') == -1) and (param.find('https://') == -1):
            writetofile(args, param, 1)
            print_baby(param)

# blacklist for url content
def checkblack(url):
    blacklist = (['.css', '.ico', '.jpg', 'mailto:', 'javascript:', '.doc',
        '.pdf', '.png'])
    for black in blacklist:
        if url.find(black) != -1:
            return True

# sub / any domain grabbing
def subdomain(subdomain, url):
     begin = url.find(subdomain)
     if begin != -1:
         return True

# filter all found urls
def filterurls(args):
    filtered = []
    start_url = args.u
    for url in URLLIST:
        url = url.replace('&amp;', '&')
        if len(url) < 2:
            continue
        elif url == start_url:
            continue
        elif checkblack(url) == True:
            continue
        elif url.find(start_url) != -1:
            filtered.append(url)
        elif (url.find('http://') == -1) and (url.find('https://') == -1):
            if url[0] == '/':
                url = url[1:]
            url = '%s%s' % (start_url, url)
            filtered.append(url)
        elif subdomain(args.s, url) == True:
            filtered.append(url)
    return set(filtered)

# find urls after request
def findurls(response):
    u = 0
    while u < len(response):
        begin = response[u:].find('href="')
        if begin == -1:
            return 1
        begin += 6 + u
        end = response[begin:].find('"') + begin + 1
        URLLIST.append(response[begin-1:end].replace('"', ''))
        u = end

# send http/https request to server
def scan(url, to, i):
    request = urllib2.Request(url)
    try:
        response = urllib2.urlopen(request, timeout = to)
        findurls(response.read())
    except urllib2.HTTPError, e:
        return
    except urllib2.URLError, e:
        reason = '%s' % (e.reason)
        if reason == '[Errno 61] Connection refused':
            print '[-] Connection refused: %s' % (url)
            os._exit(1)
        if reason == 'timed out':
            print '[-] %s timed out' % (url)
            if i < 10:
                i =+ 1
                scan(url, to, i)
            return
    except:
        if i < 10:
            i =+ 1
            scan(url, to, i)
        return

# w3 <3 7hr34d1ng
def crawlmore(args, urls):
    to = args.T
    threads = args.t

    for url in urls:
        Run = threading.Thread(target=scan, args=(url, to, 0,))
        Run.start()
        # checks that we a max number of threads
        while threading.activeCount() > threads:
            time.sleep(0.01)
        time.sleep(0.001)

    # waiting for the last running threads
    while threading.activeCount() > 1:
        time.sleep(0.1)

# test for open http/https port (better than urllib2 stuff)
def test_http(target, timeout):
    try:
        s = socket(AF_INET, SOCK_STREAM)
        s.settimeout(timeout)
        result = s.connect_ex((target, 80))
        s.close()
        if result == 0:
            return 0
        s = socket(AF_INET, SOCK_STREAM)
        s.settimeout(timeout)
        result = s.connect_ex((target, 443))
        s.close()
        if result == 0:
            return 1
        return
    except:
       print '[-] cannot resolve ip'
       os._exit(1)

# check url format, make it valid for urllib2
def checkformat(args):
    url = args.u
    to = args.T
    if (url.find('http://') == -1) and (url.find('https://') == -1):
        if test_http(url, to) == 0:
            url = 'http://%s' % (url)
        elif test_http(url, to) == 1:
            url = 'https://%s' % (url)
        else:
            print '[-] cannot connect'
            os._exit(1)
    if url[len(url) - 1].find('/') == -1:
        url = '%s/' % (url)
    return url

# 57up1d m41n
def main():
    banner()
    args = argspage()
    args.u = checkformat(args)
    urls = [ args.u ]
    print '\n[*] crawling %s' % (args.u)
    for i in range(args.d):
        crawlmore(args, urls)
        urls = filterurls(args)
    printurls(args, urls)
    parameters(args, urls)
    finished()

if __name__ == '__main__':
    URLLIST = []
    try:
        main()
    except KeyboardInterrupt:
        print '\nbye bye!!!'
        time.sleep(0.01)
        os._exit(1)

# EOF