Advertisement
k3170makan

HuntingDuck -- Python duckduckgo dorker

Mar 15th, 2012
569
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.35 KB | None | 0 0
  1. #!/usr/bin/python
  2. import urllib2,urllib,sys,re,cookielib
  3. from BeautifulSoup import BeautifulSoup as soup
  4. #import httplib
  5. """
  6.     Author: Keith (k3170) Makan
  7.     Twitter: @k3170makan
  8.     site:k3170makan.blogspot.com
  9.  
  10.     Descr: A script that allows you to use the power of duckduckgo dorks straight from the comfort of your command line
  11. """
  12. class HuntingDuck:
  13.     def __init__(self,isProxied,proxy_details):
  14.         """
  15.             THERE ARE NO HACKING TOOLS/TARGETS EXCEPT THE HUMAN MIND -- k3170makan
  16.  
  17.             The object that handles all the net requests and just returns Dorker
  18.             the goodies it needs --- encapsulations like a BAUSS!!
  19.            
  20.             *the following is facilitated but not implemented in the code
  21.  
  22.             proxy_details ---- should be the addess to the proxy host, if one is not supplied it will remain
  23.                                      empty, and urllib2 with continue working ;)
  24.                 e.g inet_('http://username:password@host:port_num')
  25.             isProxied     ---- <True|False> specifies whether the dorking is happening from behind a proxy
  26.         """
  27.         self.isProxied = isProxied
  28.         self.http_inst = '';
  29.         print "{ Setting up connection objects...."
  30.         if isProxied:
  31.             print "{ Setting up proxy API functions..."
  32.             self.proxy_handler = urllib2.ProxyHandler({'http':proxy_details})   #build the proxy handler obj
  33.             self.cookiejar = cookielib.CookieJar() #not needed now!
  34.             try:
  35.                 self.opener = urllib2.build_opener(self.proxy_handler,urllib2.HTTPCookieProcessor(self.cookiejar));
  36.             except:
  37.                 print " HALT -- Problem with proxy set up :( }"
  38.                 return
  39.             print " OKAY }"
  40.         else:
  41.             self.proxy_handler = urllib2.ProxyHandler({})
  42.             self.cookiejar = cookielib.CookieJar() #not needed now!
  43.             try:
  44.                 self.opener = urllib2.build_opener(self.proxy_handler,urllib2.HTTPCookieProcessor(self.cookiejar));
  45.             except:
  46.                 print " HALT -- Problem with setting up connection objects :( }"
  47.                 return
  48.             print " OKAY }"
  49.         print "DONE } ready to start querying!"
  50.         self.headers = []
  51.     def getPage(self,dork):
  52.         print "{ getting page ... }"
  53.         self.headers = []
  54.         self.headers.append(('Host','duckduckgo.com'))
  55.         self.headers.append(('User-agent','Mozilla/5.0 (X11; Linux i686 on x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'))
  56.         self.headers.append(('Referrer','k3170makan.blogspot.com'))
  57.         self.opener.addheaders = self.headers
  58.         print "{ Preparing url opener object..."
  59.         try:
  60.             urllib2.install_opener(self.opener) #okay now i can start making requests n shit
  61.         except e:
  62.             print " HALT -- problem installing opener obj"
  63.             print e
  64.             return
  65.         finally:
  66.             print " OKAY }"
  67.         try:
  68.             print "{ Sending request "
  69.             try:
  70.                 request = urllib2.Request('http://www.duckduckgo.com/html?q='+dork)
  71.             except urllib2.URLError, e:
  72.                 print e
  73.                 print "}"
  74.                 return
  75.             print " }"
  76.             print
  77.             print "{ Request sent: \n"
  78.             for header in self.headers:
  79.                 print "<<< [%s] : %s " % (header[0],header[1])
  80.             print " }"
  81.             response = urllib2.urlopen(request)
  82.             print "{ Recieved response:\n "
  83.             resp_headers = response.info()
  84.             for header_name in resp_headers:
  85.                 print ">>> [%s] : %s " % (header_name,resp_headers[header_name])
  86.             print " }"
  87.             html = response.read()
  88.             print "Recieved <%s> bytes of data" % (len(html))
  89.         except urllib2.URLError, e:
  90.             print e
  91.             return ""
  92.         return html
  93.     def getURLs(self,html):
  94.         stripper = Stripper(html)
  95.         links = stripper.strip()
  96.         print "Results:"
  97.         print
  98.         for index,link in enumerate(links):
  99.             print "%d) %s %s" % ( index+1,link.URL,link.snippet)
  100.             print
  101. class resultLink:
  102.     def __init__(self,snippet,URL,srv_type,src_type):
  103.         self.snippet = snippet
  104.         self.URL = URL
  105.         self.serverType = srv_type
  106.         self.scriptType = src_type
  107.     def setSnippet(self,snippet):
  108.         self.snippet = snippet
  109.     def setURL(self,URL):
  110.         self.URL = URL
  111.     def setServerType(self,stype):
  112.         self.serverType = stype
  113.     def setScriptType(self,stype):
  114.         self.scriptType = stype
  115. class Stripper:
  116.     """
  117.         An object that strips the links from a page
  118.     """
  119.     def __init__(self,page):
  120.         self.links = []
  121.         self.page = page
  122.     def strip(self):
  123.         soop = soup(self.page) #init the bs object
  124.         links_wrapper = soop.findAll("div",{"class":"results_links results_links_deep web-result"}) #go to the node that contains all results
  125.         results_arr = [] #a list of result_link objects
  126.         for index,link in enumerate(links_wrapper):
  127.             s = soup(str(link))
  128.             results = s.findAll("div",{"class":"links_main links_deep"})
  129.             #print
  130.             #print index,">"
  131.             for res in results:
  132.                 s = soup(str(res))
  133.                 a = s.findAll("a",{"class":"large"})
  134.                 #print str(a)
  135.                 anchor = str(a).split(" ")[3].split("href=")[1].split(">")[0] #commiting some python sins
  136.                 anchor = anchor.replace("&amp;","&")
  137.                 #print "URL [%s] " % (str(anchor)),
  138.                 snippet = s.findAll("div",{"class":"snippet"})
  139.                 #print " %s" % (str(snippet))
  140.                 res = resultLink(str(snippet),str(anchor),"","")
  141.                 results_arr.append(res)
  142.         return results_arr
  143.  
  144. if __name__ == "__main__":
  145.     print "========================="
  146.     print ".::Hunting Duck 1.1::.\n"
  147.     print "========================="
  148.     print "\t\tby k3170makan"
  149.     hd = HuntingDuck(False,'')
  150.     if len(sys.argv) < 1:
  151.        
  152.         print "Usage: ./HuntingDuck [dork]\n"
  153.         print "example: ./HuntingDuck site:.gov.za"
  154.         print "Please ensure that your 'dork' does not contain spaces,\n proxy support is available to those who are willing to hack it out of my code ;)\n"
  155.     else:
  156.         html = hd.getPage(sys.argv[1])
  157.         hd.getURLs(html)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement