Advertisement
k3170makan

HuntingDuck v1.1

Mar 15th, 2012
262
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.23 KB | None | 0 0
  1. #!/usr/bin/python
  2. import urllib2,urllib,sys,re,cookielib
  3. from BeautifulSoup import BeautifulSoup as soup
  4. from urlparse import urlparse
  5. #import httplib
  6. """
  7.     Author: Keith (k3170) Makan
  8.     Twitter: @k3170makan
  9.     site:k3170makan.blogspot.com
  10.  
  11.     Descr: A script that allows you to use the power of duckduckgo dorks straight from the comfort of your command line
  12. """
  13. class HuntingDuck:
  14.     def __init__(self,isProxied,proxy_details):
  15.         """
  16.             THERE ARE NO HACKING TOOLS/TARGETS EXCEPT THE HUMAN MIND -- k3170makan
  17.  
  18.             The object that handles all the net requests and just returns Dorker
  19.             the goodies it needs --- encapsulations like a BAUSS!!
  20.            
  21.             *the following is facilitated but not implemented in the code
  22.  
  23.             proxy_details ---- should be the addess to the proxy host, if one is not supplied it will remain
  24.                                      empty, and urllib2 with continue working ;)
  25.                 e.g inet_('http://username:password@host:port_num')
  26.             isProxied     ---- <True|False> specifies whether the dorking is happening from behind a proxy
  27.         """
  28.         self.isProxied = isProxied
  29.         self.http_inst = '';
  30.         print "{ Setting up connection objects...."
  31.         if isProxied:
  32.             print "{ Setting up proxy API functions..."
  33.             self.proxy_handler = urllib2.ProxyHandler({'http':proxy_details})   #build the proxy handler obj
  34.             self.cookiejar = cookielib.CookieJar() #not needed now!
  35.             try:
  36.                 self.opener = urllib2.build_opener(self.proxy_handler,urllib2.HTTPCookieProcessor(self.cookiejar));
  37.             except:
  38.                 print " HALT -- Problem with proxy set up :( }"
  39.                 return
  40.             print " OKAY }"
  41.         else:
  42.             self.proxy_handler = urllib2.ProxyHandler({})
  43.             self.cookiejar = cookielib.CookieJar() #not needed now!
  44.             try:
  45.                 self.opener = urllib2.build_opener(self.proxy_handler,urllib2.HTTPCookieProcessor(self.cookiejar));
  46.             except:
  47.                 print " HALT -- Problem with setting up connection objects :( }"
  48.                 return
  49.             print " OKAY }"
  50.         print "DONE } ready to start querying!"
  51.         self.headers = []
  52.     def getPage(self,dork):
  53.         print "{ getting page ... }"
  54.         self.headers = []
  55.         self.headers.append(('Host','duckduckgo.com'))
  56.         self.headers.append(('User-agent','Mozilla/5.0 (X11; Linux i686 on x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'))
  57.         self.headers.append(('Referrer','k3170makan.blogspot.com'))
  58.         self.opener.addheaders = self.headers
  59.         print "{ Preparing url opener object..."
  60.         try:
  61.             urllib2.install_opener(self.opener) #okay now i can start making requests n shit
  62.         except e:
  63.             print " HALT -- problem installing opener obj"
  64.             print e
  65.             return
  66.         finally:
  67.             print " OKAY }"
  68.         try:
  69.             print "{ Sending request "
  70.             try:
  71.                 request = urllib2.Request('http://www.duckduckgo.com/html?q='+dork)
  72.             except urllib2.URLError, e:
  73.                 print e
  74.                 print "}"
  75.                 return
  76.             print " }"
  77.             print
  78.             print "{ Request sent: \n"
  79.             for header in self.headers:
  80.                 print "<<< [%s] : %s " % (header[0],header[1])
  81.             print " }"
  82.             response = urllib2.urlopen(request)
  83.             print "{ Recieved response:\n "
  84.             resp_headers = response.info()
  85.             for header_name in resp_headers:
  86.                 print ">>> [%s] : %s " % (header_name,resp_headers[header_name])
  87.             print " }"
  88.             html = response.read()
  89.             print "Recieved <%s> bytes of data" % (len(html))
  90.         except urllib2.URLError, e:
  91.             print e
  92.             return ""
  93.         return html
  94.     def getURLs(self,html):
  95.         stripper = Stripper(html)
  96.         links = stripper.strip()
  97.         print "Results:"
  98.         print
  99.         for index,link in enumerate(links):
  100.             print "%d] %s" % ( index+1,link)
  101.         return links
  102.     def doxTarget(self,target):
  103.         """
  104.             Get basic info from the target
  105.                 *servable pages
  106.                 *OSinfo
  107.                     >server type
  108.                     >script type
  109.                         *will add more capability later ;)
  110.         """
  111.  
  112.         parsed = urlparse(target)
  113.         print "=========================================="
  114.         print "Getting info on target << %s >>" % (target)
  115.         print "=========================================="
  116.  
  117.         pages = self.getServablePages(parsed.netloc)
  118.         osinfo = self.getOSinfo(target)
  119.         print osinfo
  120.     def getServablePages(self,target):
  121.         #this simply runs a dork with the 'site:' directive for a given target
  122.         dork = "site:"+target
  123.         print "Running dork <%s>" %  (dork)
  124.         page = self.getPage(dork)
  125.         links = self.getURLs(page)
  126.         return links
  127.     def getOSinfo(self,target):
  128.         return ''
  129. class Stripper:
  130.     """
  131.         An object that strips the links from a page
  132.     """
  133.     def __init__(self,page):
  134.         self.links = []
  135.         self.page = page
  136.     def strip(self):
  137.         soop = soup(self.page) #init the bs object
  138.         links_wrapper = soop.findAll("div",{"class":"results_links results_links_deep web-result"}) #go to the node that contains all results
  139.         results_arr = [] #a list of result_link objects
  140.         for index,link in enumerate(links_wrapper):
  141.             s = soup(str(link))
  142.             results = s.findAll("div",{"class":"links_main links_deep"})
  143.             #print
  144.             #print index,">"
  145.             for res in results:
  146.                 s = soup(str(res))
  147.                 a = s.findAll("a",{"class":"large"})
  148.                 #print str(a)
  149.                 anchor = str(a).split(" ")[3].split("href=")[1].split(">")[0] #commiting some python sins
  150.                 anchor = anchor.replace("&amp;","&")
  151.                 parsed = urlparse(anchor)
  152.                 #print "URL [%s] " % (str(anchor)),
  153.                 snippet = s.findAll("div",{"class":"snippet"})
  154.                 #print " %s" % (str(snippet))
  155.                 if len(anchor.split("\"")) > 1:
  156.                     anchor = anchor.split("\"")[1]
  157.                 elif len(anchor.split("'")) > 1:
  158.                     anchor = anchor.split("'")[1]
  159.                 res = str(anchor)
  160.                 results_arr.append(res)
  161.         return set(results_arr)
  162. class Target:
  163.     def __init__(self):
  164.         self.URL
  165.         self.domain
  166. if __name__ == "__main__":
  167.     print "========================="
  168.     print ".::Hunting Duck 1.1::.\n"
  169.     print "========================="
  170.     print "\t\tby k3170makan"
  171.     hd = HuntingDuck(False,'')
  172.     if len(sys.argv) < 2:
  173.        
  174.         print "Usage: ./HuntingDuck [dork] [1|0]\n"
  175.         print "[dork] -- should contain the search query"
  176.         print "[1|0]  -- 1 if you'd like the targets to be inspected further, 0 -- if not"
  177.         print "example: ./HuntingDuck site:.gov.za"
  178.         print "Please ensure that your 'dork' does not contain spaces,\n proxy support is available to those who are willing to hack it out of my code ;)\n"
  179.     else:
  180.         html = hd.getPage(sys.argv[1])
  181.         targets = hd.getURLs(html)
  182.         isnum = sys.argv[2].isdigit()
  183.         if isnum and int(sys.argv[2]) == 1:
  184.             for sucker in targets:
  185.                 hd.doxTarget(sucker)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement