Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- import urllib2,urllib,sys,re,cookielib
- from BeautifulSoup import BeautifulSoup as soup
- #import httplib
- """
- Author: Keith (k3170) Makan
- Twitter: @k3170makan
- site:k3170makan.blogspot.com
- Descr: A script that allows you to use the power of duckduckgo dorks straight from the comfort of your command line
- """
- class HuntingDuck:
- def __init__(self,isProxied,proxy_details):
- """
- THERE ARE NO HACKING TOOLS/TARGETS EXCEPT THE HUMAN MIND -- k3170makan
- The object that handles all the net requests and just returns Dorker
- the goodies it needs --- encapsulations like a BAUSS!!
- *the following is facilitated but not implemented in the code
- proxy_details ---- should be the addess to the proxy host, if one is not supplied it will remain
- empty, and urllib2 with continue working ;)
- e.g inet_('http://username:password@host:port_num')
- isProxied ---- <True|False> specifies whether the dorking is happening from behind a proxy
- """
- self.isProxied = isProxied
- self.http_inst = '';
- print "{ Setting up connection objects...."
- if isProxied:
- print "{ Setting up proxy API functions..."
- self.proxy_handler = urllib2.ProxyHandler({'http':proxy_details}) #build the proxy handler obj
- self.cookiejar = cookielib.CookieJar() #not needed now!
- try:
- self.opener = urllib2.build_opener(self.proxy_handler,urllib2.HTTPCookieProcessor(self.cookiejar));
- except:
- print " HALT -- Problem with proxy set up :( }"
- return
- print " OKAY }"
- else:
- self.proxy_handler = urllib2.ProxyHandler({})
- self.cookiejar = cookielib.CookieJar() #not needed now!
- try:
- self.opener = urllib2.build_opener(self.proxy_handler,urllib2.HTTPCookieProcessor(self.cookiejar));
- except:
- print " HALT -- Problem with setting up connection objects :( }"
- return
- print " OKAY }"
- print "DONE } ready to start querying!"
- self.headers = []
- def getPage(self,dork):
- print "{ getting page ... }"
- self.headers = []
- self.headers.append(('Host','duckduckgo.com'))
- self.headers.append(('User-agent','Mozilla/5.0 (X11; Linux i686 on x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'))
- self.headers.append(('Referrer','k3170makan.blogspot.com'))
- self.opener.addheaders = self.headers
- print "{ Preparing url opener object..."
- try:
- urllib2.install_opener(self.opener) #okay now i can start making requests n shit
- except e:
- print " HALT -- problem installing opener obj"
- print e
- return
- finally:
- print " OKAY }"
- try:
- print "{ Sending request "
- try:
- request = urllib2.Request('http://www.duckduckgo.com/html?q='+dork)
- except urllib2.URLError, e:
- print e
- print "}"
- return
- print " }"
- print
- print "{ Request sent: \n"
- for header in self.headers:
- print "<<< [%s] : %s " % (header[0],header[1])
- print " }"
- response = urllib2.urlopen(request)
- print "{ Recieved response:\n "
- resp_headers = response.info()
- for header_name in resp_headers:
- print ">>> [%s] : %s " % (header_name,resp_headers[header_name])
- print " }"
- html = response.read()
- print "Recieved <%s> bytes of data" % (len(html))
- except urllib2.URLError, e:
- print e
- return ""
- return html
- def getURLs(self,html):
- stripper = Stripper(html)
- links = stripper.strip()
- print "Results:"
- print
- for index,link in enumerate(links):
- print "%d) %s %s" % ( index+1,link.URL,link.snippet)
- print
- class resultLink:
- def __init__(self,snippet,URL,srv_type,src_type):
- self.snippet = snippet
- self.URL = URL
- self.serverType = srv_type
- self.scriptType = src_type
- def setSnippet(self,snippet):
- self.snippet = snippet
- def setURL(self,URL):
- self.URL = URL
- def setServerType(self,stype):
- self.serverType = stype
- def setScriptType(self,stype):
- self.scriptType = stype
- class Stripper:
- """
- An object that strips the links from a page
- """
- def __init__(self,page):
- self.links = []
- self.page = page
- def strip(self):
- soop = soup(self.page) #init the bs object
- links_wrapper = soop.findAll("div",{"class":"results_links results_links_deep web-result"}) #go to the node that contains all results
- results_arr = [] #a list of result_link objects
- for index,link in enumerate(links_wrapper):
- s = soup(str(link))
- results = s.findAll("div",{"class":"links_main links_deep"})
- #print
- #print index,">"
- for res in results:
- s = soup(str(res))
- a = s.findAll("a",{"class":"large"})
- #print str(a)
- anchor = str(a).split(" ")[3].split("href=")[1].split(">")[0] #commiting some python sins
- anchor = anchor.replace("&","&")
- #print "URL [%s] " % (str(anchor)),
- snippet = s.findAll("div",{"class":"snippet"})
- #print " %s" % (str(snippet))
- res = resultLink(str(snippet),str(anchor),"","")
- results_arr.append(res)
- return results_arr
- if __name__ == "__main__":
- print "========================="
- print ".::Hunting Duck 1.1::.\n"
- print "========================="
- print "\t\tby k3170makan"
- hd = HuntingDuck(False,'')
- if len(sys.argv) < 1:
- print "Usage: ./HuntingDuck [dork]\n"
- print "example: ./HuntingDuck site:.gov.za"
- print "Please ensure that your 'dork' does not contain spaces,\n proxy support is available to those who are willing to hack it out of my code ;)\n"
- else:
- html = hd.getPage(sys.argv[1])
- hd.getURLs(html)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement