HuntingDuck v1.1

#!/usr/bin/python
import urllib2,urllib,sys,re,cookielib
from BeautifulSoup import BeautifulSoup as soup
from urlparse import urlparse
#import httplib
"""
    Author: Keith (k3170) Makan
    Twitter: @k3170makan
    site:k3170makan.blogspot.com

    Descr: A script that allows you to use the power of duckduckgo dorks straight from the comfort of your command line
"""
class HuntingDuck:
    def __init__(self,isProxied,proxy_details):
        """
            THERE ARE NO HACKING TOOLS/TARGETS EXCEPT THE HUMAN MIND -- k3170makan

            The object that handles all the net requests and just returns Dorker
            the goodies it needs --- encapsulations like a BAUSS!!

            *the following is facilitated but not implemented in the code

            proxy_details ---- should be the addess to the proxy host, if one is not supplied it will remain
                                     empty, and urllib2 with continue working ;)
                e.g inet_('http://username:password@host:port_num')
            isProxied     ---- <True|False> specifies whether the dorking is happening from behind a proxy
        """
        self.isProxied = isProxied
        self.http_inst = '';
        print "{ Setting up connection objects...."
        if isProxied:
            print "{ Setting up proxy API functions..."
            self.proxy_handler = urllib2.ProxyHandler({'http':proxy_details})   #build the proxy handler obj
            self.cookiejar = cookielib.CookieJar() #not needed now!
            try:
                self.opener = urllib2.build_opener(self.proxy_handler,urllib2.HTTPCookieProcessor(self.cookiejar));
            except:
                print " HALT -- Problem with proxy set up :( }"
                return
            print " OKAY }"
        else:
            self.proxy_handler = urllib2.ProxyHandler({})
            self.cookiejar = cookielib.CookieJar() #not needed now!
            try:
                self.opener = urllib2.build_opener(self.proxy_handler,urllib2.HTTPCookieProcessor(self.cookiejar));
            except:
                print " HALT -- Problem with setting up connection objects :( }"
                return
            print " OKAY }"
        print "DONE } ready to start querying!"
        self.headers = []
    def getPage(self,dork):
        print "{ getting page ... }"
        self.headers = []
        self.headers.append(('Host','duckduckgo.com'))
        self.headers.append(('User-agent','Mozilla/5.0 (X11; Linux i686 on x86_64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1'))
        self.headers.append(('Referrer','k3170makan.blogspot.com'))
        self.opener.addheaders = self.headers
        print "{ Preparing url opener object..."
        try:
            urllib2.install_opener(self.opener) #okay now i can start making requests n shit
        except e:
            print " HALT -- problem installing opener obj"
            print e
            return
        finally:
            print " OKAY }"
        try:
            print "{ Sending request "
            try:
                request = urllib2.Request('http://www.duckduckgo.com/html?q='+dork)
            except urllib2.URLError, e:
                print e
                print "}"
                return
            print " }"
            print
            print "{ Request sent: \n"
            for header in self.headers:
                print "<<< [%s] : %s " % (header[0],header[1])
            print " }"
            response = urllib2.urlopen(request)
            print "{ Recieved response:\n "
            resp_headers = response.info()
            for header_name in resp_headers:
                print ">>> [%s] : %s " % (header_name,resp_headers[header_name])
            print " }"
            html = response.read()
            print "Recieved <%s> bytes of data" % (len(html))
        except urllib2.URLError, e:
            print e
            return ""
        return html
    def getURLs(self,html):
        stripper = Stripper(html)
        links = stripper.strip()
        print "Results:"
        print
        for index,link in enumerate(links):
            print "%d] %s" % ( index+1,link)
        return links
    def doxTarget(self,target):
        """
            Get basic info from the target
                *servable pages
                *OSinfo
                    >server type
                    >script type
                        *will add more capability later ;)
        """

        parsed = urlparse(target)
        print "=========================================="
        print "Getting info on target << %s >>" % (target)
        print "=========================================="

        pages = self.getServablePages(parsed.netloc)
        osinfo = self.getOSinfo(target)
        print osinfo
    def getServablePages(self,target):
        #this simply runs a dork with the 'site:' directive for a given target
        dork = "site:"+target
        print "Running dork <%s>" %  (dork)
        page = self.getPage(dork)
        links = self.getURLs(page)
        return links
    def getOSinfo(self,target):
        return ''
class Stripper:
    """
        An object that strips the links from a page
    """
    def __init__(self,page):
        self.links = []
        self.page = page
    def strip(self):
        soop = soup(self.page) #init the bs object
        links_wrapper = soop.findAll("div",{"class":"results_links results_links_deep web-result"}) #go to the node that contains all results
        results_arr = [] #a list of result_link objects
        for index,link in enumerate(links_wrapper):
            s = soup(str(link))
            results = s.findAll("div",{"class":"links_main links_deep"})
            #print
            #print index,">"
            for res in results:
                s = soup(str(res))
                a = s.findAll("a",{"class":"large"})
                #print str(a)
                anchor = str(a).split(" ")[3].split("href=")[1].split(">")[0] #commiting some python sins
                anchor = anchor.replace("&amp;","&")
                parsed = urlparse(anchor)
                #print "URL [%s] " % (str(anchor)),
                snippet = s.findAll("div",{"class":"snippet"})
                #print " %s" % (str(snippet))
                if len(anchor.split("\"")) > 1:
                    anchor = anchor.split("\"")[1]
                elif len(anchor.split("'")) > 1:
                    anchor = anchor.split("'")[1]
                res = str(anchor)
                results_arr.append(res)
        return set(results_arr)
class Target:
    def __init__(self):
        self.URL
        self.domain
if __name__ == "__main__":
    print "========================="
    print ".::Hunting Duck 1.1::.\n"
    print "========================="
    print "\t\tby k3170makan"
    hd = HuntingDuck(False,'')
    if len(sys.argv) < 2:

        print "Usage: ./HuntingDuck [dork] [1|0]\n"
        print "[dork] -- should contain the search query"
        print "[1|0]  -- 1 if you'd like the targets to be inspected further, 0 -- if not"
        print "example: ./HuntingDuck site:.gov.za"
        print "Please ensure that your 'dork' does not contain spaces,\n proxy support is available to those who are willing to hack it out of my code ;)\n"
    else:
        html = hd.getPage(sys.argv[1])
        targets = hd.getURLs(html)
        isnum = sys.argv[2].isdigit()
        if isnum and int(sys.argv[2]) == 1:
            for sucker in targets:
                hd.doxTarget(sucker)