Guest User

collect.py

a guest
Jan 29th, 2017
526
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.38 KB | None | 0 0
  1. '''
  2. http://pastebin.com/D1JUm1nk
  3. 5902a57cf13f6280665357eaceec1cabe9129f91369475697abe85be52278ea8
  4.  
  5.  
  6. In this step we'll be creating a distributed index. This script is comprised of
  7. a scanner called the Carpenter that will spawn several threads, each sending out
  8. feelers looking for web servers out there that are willing to respond. Responses
  9. are kept in a sqlite database named walrus.db
  10.  
  11. walrus.db will contain a table, Walrus, with the following columns:
  12.    host -- The host IP address of the discovered server
  13.    key  -- A header key from the server response
  14.    val  -- The associated value to the header key
  15.  
  16. This will allow us to later create a massively parallel database of all servers
  17. that interest us. But for now, let's just do some collecting.
  18.  
  19. Save this file as collect.py and execute it using
  20. python (https://www.python.org/downloads/) with the following command:
  21.  
  22. python collect.py
  23.  
  24. The longer you run this the more you will collect. You can quit and resume at
  25. any time.
  26.  
  27. Feel free to query this table directly should you be interested in this
  28. intermediate state. But whatever you do, keep your walrus.db file safe because
  29. we'll be using this in our next step, which will be published with the following
  30. key: 38a11b99402ab435eb0bc53419bf278e582cb3c3361da5be1b8f69d3bf422470
  31.  
  32. Thank you for your support. Together we can do this!
  33.  
  34. DISCLAIMER: Your ISP (or local law) may prohibit scanning of this nature. Hey,
  35. let's be careful out there.
  36. '''
  37.  
  38. try:
  39.     from Queue import Queue
  40. except:
  41.     from queue import Queue
  42. from random import randrange as rand
  43. try:
  44.     from requests import get
  45. except:
  46.     print('requests module required:\npip install requests')
  47.     exit()
  48. from sqlite3 import connect
  49. from threading import Thread, active_count
  50. from time import sleep
  51.  
  52. # whitelist of header keys
  53. KEYS = [
  54.     'connection',
  55.     'content-encoding',
  56.     'content-length',
  57.     'content-type',
  58.     'etag',
  59.     'server',
  60.     'x-frame-options',
  61.     'x-powered-by'
  62. ]
  63.  
  64. class Carpenter:
  65.  
  66.     def __init__(self, threadcount=100):
  67.         self.threadcount = threadcount
  68.  
  69.     def run(self):
  70.         '''
  71.        Let's get this party started!
  72.        '''
  73.         queue = Queue()
  74.         threads = []
  75.         for i in range(self.threadcount):
  76.             self.walk(self.worker, queue)
  77.         self.walk(self.tweedle, queue)
  78.         while active_count() > 0:
  79.             sleep(0.25)
  80.  
  81.     def walk(self, fn, queue):
  82.         '''
  83.        Spawn a daemon thread. 2spooky4me.
  84.        '''
  85.         thread = Thread(target=fn, args=(queue,))
  86.         thread.daemon = True
  87.         thread.start()
  88.  
  89.     def tweedle(self, queue):
  90.         '''
  91.        Process the queue in one thread (because sqlite).
  92.        '''
  93.         dee = connect('walrus.db')
  94.         dum = dee.cursor()
  95.         dum.execute('''
  96.            create table if not exists Walrus (host text, key text, val text)
  97.        ''')
  98.         dum.execute('create index if not exists idx_host on Walrus (host)')
  99.         dee.commit()
  100.         while True:
  101.             host, headers = queue.get()
  102.             if headers:
  103.                 print(host)
  104.                 values = []
  105.                 for key, val in headers.items():
  106.                     print('   ', key, val)
  107.                     values.append((host, key, val))
  108.                 dum.executemany('''
  109.                    insert into Walrus (host, key, val) values (?, ?, ?)
  110.                ''', values)
  111.                 dee.commit()
  112.  
  113.     def work(self):
  114.         '''
  115.        Do work on one random host.
  116.        '''
  117.         host = '{}.{}.{}.{}'.format(rand(256), rand(256), rand(256), rand(256))
  118.         req = get('http://' + host, timeout=5, headers={
  119.             'User-Agent': 'Do you admire the view?',
  120.         })
  121.         headers = {}
  122.         for key, val in req.headers.items():
  123.             # Restrict the value length... There are weirdos ones out there...
  124.             if key.lower() in KEYS and len(val) < 256:
  125.                 headers[key] = val
  126.         headers['status'] = req.status_code
  127.         return [host, headers]
  128.  
  129.     def worker(self, queue):
  130.         '''
  131.        Work forever... :|
  132.        Don't worry, computers are into that kind of thing.
  133.        '''
  134.         while True:
  135.             try:
  136.                 result = self.work()
  137.             except:
  138.                 continue
  139.             queue.put(result)
  140.  
  141.  
  142. if __name__ == '__main__':
  143.     Carpenter().run()
Add Comment
Please, Sign In to add comment