Advertisement
mengyuxin

pwnbin.py

Jan 24th, 2018
361
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.67 KB | None | 0 0
  1. import time
  2. #import urllib2
  3. import urllib
  4. import datetime
  5. import sys, getopt
  6. from bs4 import BeautifulSoup
  7. #from StringIO import StringIO
  8. from io import StringIO
  9. import gzip
  10.  
  11. #https://github.com/kahunalu/pwnbin
  12. #https://github.com/mengyuxin/pwnbin
  13.  
  14. def main(argv):
  15.  
  16.     length                                  = 0
  17.     time_out                                = False
  18.     found_keywords                          = []
  19.     paste_list                              = set([])
  20.     root_url                                = 'http://pastebin.com'
  21.     raw_url                                 = 'http://pastebin.com/raw/'
  22.     start_time                              = datetime.datetime.now()
  23.     file_name, keywords, append, run_time, match_total, crawl_total = initialize_options(argv)
  24.  
  25.     print("\nCrawling %s Press ctrl+c to save file to %s" % (root_url, file_name))
  26.  
  27.     try:
  28.         # Continually loop until user stops execution
  29.         while True:
  30.  
  31.             #   Get pastebin home page html
  32.             root_html = BeautifulSoup(fetch_page(root_url), 'html.parser')
  33.            
  34.             #   For each paste in the public pastes section of home page
  35.             for paste in find_new_pastes(root_html):
  36.                
  37.                 #   look at length of paste_list prior to new element
  38.                 length = len(paste_list)
  39.                 paste_list.add(paste)
  40.  
  41.                 #   If the length has increased the paste is unique since a set has no duplicate entries
  42.                 if len(paste_list) > length:
  43.                    
  44.                     #   Add the pastes url to found_keywords if it contains keywords
  45.                     raw_paste = raw_url+paste
  46.                     found_keywords = find_keywords(raw_paste, found_keywords, keywords)
  47.  
  48.                 else:
  49.  
  50.                     #   If keywords are not found enter time_out
  51.                     time_out = True
  52.  
  53.             # Enter the timeout if no new pastes have been found
  54.             if time_out:
  55.                 time.sleep(2)
  56.  
  57.             sys.stdout.write("\rCrawled total of %d Pastes, Keyword matches %d" % (len(paste_list), len(found_keywords)))
  58.             sys.stdout.flush()
  59.  
  60.             if run_time and (start_time + datetime.timedelta(seconds=run_time)) < datetime.datetime.now():
  61.                 sys.stdout.write("\n\nReached time limit, Found %d matches." % len(found_keywords))
  62.                 write_out(found_keywords, append, file_name)
  63.                 sys.exit()
  64.  
  65.             # Exit if surpassed specified match timeout
  66.             if match_total and len(found_keywords) >= match_total:
  67.                 sys.stdout.write("\n\nReached match limit, Found %d matches." % len(found_keywords))
  68.                 write_out(found_keywords, append, file_name)
  69.                 sys.exit()
  70.  
  71.             # Exit if surpassed specified crawl total timeout
  72.             if crawl_total and len(paste_list) >= crawl_total:
  73.                 sys.stdout.write("\n\nReached total crawled Pastes limit, Found %d matches." % len(found_keywords))
  74.                 write_out(found_keywords, append, file_name)
  75.                 sys.exit()
  76.  
  77.     #   On keyboard interupt
  78.     except KeyboardInterrupt:
  79.         write_out(found_keywords, append, file_name)
  80.  
  81.     #   If http request returns an error and
  82.     except urllib.request.HTTPError as err:
  83.         if err.code == 404:
  84.             print("\n\nError 404: Pastes not found!")
  85.         elif err.code == 403:
  86.             print("\n\nError 403: Pastebin is mad at you!")
  87.         else:
  88.             print("\n\nYou\'re on your own on this one! Error code ", err.code)
  89.         write_out(found_keywords, append, file_name)
  90.  
  91.     #   If http request returns an error and
  92.     except urllib.request.URLError as err:
  93.         print("\n\nYou\'re on your own on this one! Error code ", err)
  94.         write_out(found_keywords, append, file_name)
  95.  
  96.  
  97. def write_out(found_keywords, append, file_name):
  98.     #   if pastes with keywords have been found
  99.     if len(found_keywords):
  100.  
  101.         #   Write or Append out urls of keyword pastes to file specified
  102.         if append:
  103.             f = open(file_name, 'a')
  104.         else:
  105.             f = open(file_name, 'w')
  106.  
  107.         for paste in found_keywords:
  108.             f.write(paste)
  109.         print("\n")
  110.     else:
  111.         print("\n\nNo relevant pastes found, exiting\n\n")
  112.  
  113. def find_new_pastes(root_html):
  114.     new_pastes = []
  115.  
  116.     div = root_html.find('div', {'id': 'menu_2'})
  117.     ul = div.find('ul', {'class': 'right_menu'})
  118.    
  119.     for li in ul.findChildren():
  120.         if li.find('a'):
  121.             new_pastes.append(str(li.find('a').get('href')).replace("/", ""))
  122.  
  123.     return new_pastes
  124.  
  125. def find_keywords(raw_url, found_keywords, keywords):
  126.     paste = fetch_page(raw_url)
  127.  
  128.     #   Todo: Add in functionality to rank hit based on how many of the keywords it contains
  129.     for keyword in keywords:
  130.         if paste.find(keyword) != -1:
  131.             found_keywords.append("found " + keyword + " in " + raw_url + "\n")
  132.             break
  133.  
  134.     return found_keywords
  135.  
  136. def fetch_page(page):
  137.     response = urllib.request.urlopen(page)
  138.     if response.info().get('Content-Encoding') == 'gzip':
  139.         response_buffer = StringIO(response.read())
  140.         unzipped_content = gzip.GzipFile(fileobj=response_buffer)
  141.         return unzipped_content.read()
  142.     else:
  143.         return response.read()
  144.  
  145. def initialize_options(argv):
  146.     keywords            = ['ssh', 'pass', 'key', 'token']
  147.     file_name           = 'log.txt'
  148.     append              = False
  149.     run_time            = 0
  150.     match_total         = None
  151.     crawl_total         = None
  152.  
  153.     try:
  154.         opts, args = getopt.getopt(argv,"h:k:o:t:n:m:a")
  155.     except getopt.GetoptError:
  156.         print('pwnbin.py -k <keyword1>,<keyword2>,<keyword3>..... -o <outputfile>')
  157.         sys.exit(2)
  158.  
  159.     for opt, arg in opts:
  160.  
  161.         if opt == '-h':
  162.             print('pwnbin.py -k <keyword1>,<keyword2>,<keyword3>..... -o <outputfile>')
  163.             sys.exit()
  164.         elif opt == '-a':
  165.             append = True
  166.         elif opt == "-k":
  167.             keywords = set(arg.split(","))
  168.         elif opt == "-o":
  169.             file_name = arg
  170.         elif opt == "-t":
  171.             try:
  172.                 run_time = int(arg)
  173.             except ValueError:
  174.                 print("Time must be an integer representation of seconds.")
  175.                 sys.exit()
  176.         elif opt == '-m':
  177.             try:
  178.                 match_total = int(arg)
  179.             except ValueError:
  180.                 print("Number of matches must be an integer.")
  181.                 sys.exit()
  182.  
  183.         elif opt == '-n':
  184.             try:
  185.                 crawl_total = int(arg)
  186.             except ValueError:
  187.                 print("Number of total crawled pastes must be an integer.")
  188.                 sys.exit()
  189.  
  190.     return file_name, keywords, append, run_time, match_total, crawl_total
  191.  
  192. if __name__ == "__main__":
  193.     main(sys.argv[1:])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement