googledorker.py

#!/usr/bin/python
from BeautifulSoup import BeautifulSoup as soup
import httplib
import sys
"""
    A simple extension to goofile.py
    features in this version:
        *allows you to dork from the command line, and returns critical information
            about the targets like
                >title text of the page
                >a short description
                >the URL to the target
    TODO:
        >server type detection
        >dork presets for specifying targets with
            *SQLi vulnerabilities in pages
            *LFI/RFI vulnerabilities
            *XSS vulnerabilities
            *vulnerable files
        I will implement this in such a way that you can localize a search to a given target
        >CMS type detection
        >I would like in the future to have googledorker learn from the searches you have performed and cache them
            for faster results, and also use a lil machine learning to enhance the responsiveness to certain targets

    Depedencies:
        are availble in the second line of the script!

This was inspired by googile.py

Author: Keith Makan
Twitter: k3170makan
site:k3170makan.blogspot.com
"""
class resultManager:
    """
        An object to manage results
            *title
            *URL
            *server type
            *script_type
    """
    def __init__(self):
        return

class Dorker:
    def __init__(self):
        return
    def get_page(self,dork):
        h = httplib.HTTP('www.google.com')
        h.putrequest('GET',"/search?num=100&q="+dork)
        h.putheader('Host','www.google.com')
        h.putheader('User-agent','Internet Explorer 6.0')
        h.putheader('Referrer','www.g13net.com')
        h.endheaders()
        returncode,returnmsg,headers = h.getreply()
        html=h.getfile().read()
        #print html
        return html
    def stripURLs(self,page):
        soop = soup(page)
        resTag = soop.findAll("div",{"id":"res"}) #get the divider that wraps the results

        if len(resTag) == 0:
            print page
            print
            print "Google is refusing you search query, please wait about 10mins before trying again"
            return []
        results_wrapperTag = soup(str(resTag)).findAll("ol")
        results_list = soup(str(results_wrapperTag)).findAll("li",{"class":"g"})

        result_URLs = []
        for res in results_list: #I'm using beautifull soup here, but a lot of this can be sped up by using regex!
            string_res = str(res)
            result_h3TAG = soup(string_res).findAll("h3",{"class":"r"})
            results_anchorTAG = soup(str(result_h3TAG)).findAll("a")
            URL = str(results_anchorTAG).split("/url?q=")[1].split(";")[0]
            URL = URL[:len(URL)-4] #okay so we have the url
            print "> %s " % (etc),
            results_summaryTAG = soup(string_res).findAll("div",{"class":"s"})
            for etc in results_summaryTAG:
                print "\t>>%s" % (etc)
            print
        return result_URLs
    def dork(self,dork_term):
        """
            print the results for the dork_term supplied
        """
        html = self.get_page(dork_term)
        self.stripURLs(html)
        return
if __name__ == "__main__":
    dorky = Dorker()
    if len(sys.argv) > 1:
        dorky.dork(sys.argv[1])
    else:
        print ".::Google Dorker::."
        print
        print "Usage: ./googledorker.py [dork_term]"
        print
        print 'example: ./googledorker.py inurl:".php?*=*"'
        print "*Please ensure that you're dork in all in a single line, use %20 for spaces and + to combine search operators"
        print "See: http://k3170makan.blogspot.com/2012/01/science-of-google-dorking.html"