pwnbin.py

import time
#import urllib2
import urllib
import datetime
import sys, getopt
from bs4 import BeautifulSoup
#from StringIO import StringIO
from io import StringIO
import gzip

#https://github.com/kahunalu/pwnbin
#https://github.com/mengyuxin/pwnbin

def main(argv):

    length                                  = 0
    time_out                                = False
    found_keywords                          = []
    paste_list                              = set([])
    root_url                                = 'http://pastebin.com'
    raw_url                                 = 'http://pastebin.com/raw/'
    start_time                              = datetime.datetime.now()
    file_name, keywords, append, run_time, match_total, crawl_total = initialize_options(argv)

    print("\nCrawling %s Press ctrl+c to save file to %s" % (root_url, file_name))

    try:
        # Continually loop until user stops execution
        while True:

            #   Get pastebin home page html
            root_html = BeautifulSoup(fetch_page(root_url), 'html.parser')

            #   For each paste in the public pastes section of home page
            for paste in find_new_pastes(root_html):

                #   look at length of paste_list prior to new element
                length = len(paste_list)
                paste_list.add(paste)

                #   If the length has increased the paste is unique since a set has no duplicate entries
                if len(paste_list) > length:

                    #   Add the pastes url to found_keywords if it contains keywords
                    raw_paste = raw_url+paste
                    found_keywords = find_keywords(raw_paste, found_keywords, keywords)

                else:

                    #   If keywords are not found enter time_out
                    time_out = True

            # Enter the timeout if no new pastes have been found
            if time_out:
                time.sleep(2)

            sys.stdout.write("\rCrawled total of %d Pastes, Keyword matches %d" % (len(paste_list), len(found_keywords)))
            sys.stdout.flush()

            if run_time and (start_time + datetime.timedelta(seconds=run_time)) < datetime.datetime.now():
                sys.stdout.write("\n\nReached time limit, Found %d matches." % len(found_keywords))
                write_out(found_keywords, append, file_name)
                sys.exit()

            # Exit if surpassed specified match timeout
            if match_total and len(found_keywords) >= match_total:
                sys.stdout.write("\n\nReached match limit, Found %d matches." % len(found_keywords))
                write_out(found_keywords, append, file_name)
                sys.exit()

            # Exit if surpassed specified crawl total timeout
            if crawl_total and len(paste_list) >= crawl_total:
                sys.stdout.write("\n\nReached total crawled Pastes limit, Found %d matches." % len(found_keywords))
                write_out(found_keywords, append, file_name)
                sys.exit()

    #   On keyboard interupt
    except KeyboardInterrupt:
        write_out(found_keywords, append, file_name)

    #   If http request returns an error and
    except urllib.request.HTTPError as err:
        if err.code == 404:
            print("\n\nError 404: Pastes not found!")
        elif err.code == 403:
            print("\n\nError 403: Pastebin is mad at you!")
        else:
            print("\n\nYou\'re on your own on this one! Error code ", err.code)
        write_out(found_keywords, append, file_name)

    #   If http request returns an error and
    except urllib.request.URLError as err:
        print("\n\nYou\'re on your own on this one! Error code ", err)
        write_out(found_keywords, append, file_name)


def write_out(found_keywords, append, file_name):
    #   if pastes with keywords have been found
    if len(found_keywords):

        #   Write or Append out urls of keyword pastes to file specified
        if append:
            f = open(file_name, 'a')
        else:
            f = open(file_name, 'w')

        for paste in found_keywords:
            f.write(paste)
        print("\n")
    else:
        print("\n\nNo relevant pastes found, exiting\n\n")

def find_new_pastes(root_html):
    new_pastes = []

    div = root_html.find('div', {'id': 'menu_2'})
    ul = div.find('ul', {'class': 'right_menu'})

    for li in ul.findChildren():
        if li.find('a'):
            new_pastes.append(str(li.find('a').get('href')).replace("/", ""))

    return new_pastes

def find_keywords(raw_url, found_keywords, keywords):
    paste = fetch_page(raw_url)

    #   Todo: Add in functionality to rank hit based on how many of the keywords it contains
    for keyword in keywords:
        if paste.find(keyword) != -1:
            found_keywords.append("found " + keyword + " in " + raw_url + "\n")
            break

    return found_keywords

def fetch_page(page):
    response = urllib.request.urlopen(page)
    if response.info().get('Content-Encoding') == 'gzip':
        response_buffer = StringIO(response.read())
        unzipped_content = gzip.GzipFile(fileobj=response_buffer)
        return unzipped_content.read()
    else:
        return response.read()

def initialize_options(argv):
    keywords            = ['ssh', 'pass', 'key', 'token']
    file_name           = 'log.txt'
    append              = False
    run_time            = 0
    match_total         = None
    crawl_total         = None

    try:
        opts, args = getopt.getopt(argv,"h:k:o:t:n:m:a")
    except getopt.GetoptError:
        print('pwnbin.py -k <keyword1>,<keyword2>,<keyword3>..... -o <outputfile>')
        sys.exit(2)

    for opt, arg in opts:

        if opt == '-h':
            print('pwnbin.py -k <keyword1>,<keyword2>,<keyword3>..... -o <outputfile>')
            sys.exit()
        elif opt == '-a':
            append = True
        elif opt == "-k":
            keywords = set(arg.split(","))
        elif opt == "-o":
            file_name = arg
        elif opt == "-t":
            try:
                run_time = int(arg)
            except ValueError:
                print("Time must be an integer representation of seconds.")
                sys.exit()
        elif opt == '-m':
            try:
                match_total = int(arg)
            except ValueError:
                print("Number of matches must be an integer.")
                sys.exit()

        elif opt == '-n':
            try:
                crawl_total = int(arg)
            except ValueError:
                print("Number of total crawled pastes must be an integer.")
                sys.exit()

    return file_name, keywords, append, run_time, match_total, crawl_total

if __name__ == "__main__":
    main(sys.argv[1:])