Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- from BeautifulSoup import BeautifulSoup as soup
- import httplib
- import sys
- """
- A simple extension to goofile.py
- features in this version:
- *allows you to dork from the command line, and returns critical information
- about the targets like
- >title text of the page
- >a short description
- >the URL to the target
- TODO:
- >server type detection
- >dork presets for specifying targets with
- *SQLi vulnerabilities in pages
- *LFI/RFI vulnerabilities
- *XSS vulnerabilities
- *vulnerable files
- I will implement this in such a way that you can localize a search to a given target
- >CMS type detection
- >I would like in the future to have googledorker learn from the searches you have performed and cache them
- for faster results, and also use a lil machine learning to enhance the responsiveness to certain targets
- Depedencies:
- are availble in the second line of the script!
- This was inspired by googile.py
- Author: Keith Makan
- Twitter: k3170makan
- site:k3170makan.blogspot.com
- """
- class resultManager:
- """
- An object to manage results
- *title
- *URL
- *server type
- *script_type
- """
- def __init__(self):
- return
- class Dorker:
- def __init__(self):
- return
- def get_page(self,dork):
- h = httplib.HTTP('www.google.com')
- h.putrequest('GET',"/search?num=100&q="+dork)
- h.putheader('Host','www.google.com')
- h.putheader('User-agent','Internet Explorer 6.0')
- h.putheader('Referrer','www.g13net.com')
- h.endheaders()
- returncode,returnmsg,headers = h.getreply()
- html=h.getfile().read()
- #print html
- return html
- def stripURLs(self,page):
- soop = soup(page)
- resTag = soop.findAll("div",{"id":"res"}) #get the divider that wraps the results
- if len(resTag) == 0:
- print page
- print
- print "Google is refusing you search query, please wait about 10mins before trying again"
- return []
- results_wrapperTag = soup(str(resTag)).findAll("ol")
- results_list = soup(str(results_wrapperTag)).findAll("li",{"class":"g"})
- result_URLs = []
- for res in results_list: #I'm using beautifull soup here, but a lot of this can be sped up by using regex!
- string_res = str(res)
- result_h3TAG = soup(string_res).findAll("h3",{"class":"r"})
- results_anchorTAG = soup(str(result_h3TAG)).findAll("a")
- URL = str(results_anchorTAG).split("/url?q=")[1].split(";")[0]
- URL = URL[:len(URL)-4] #okay so we have the url
- print "> %s " % (etc),
- results_summaryTAG = soup(string_res).findAll("div",{"class":"s"})
- for etc in results_summaryTAG:
- print "\t>>%s" % (etc)
- print
- return result_URLs
- def dork(self,dork_term):
- """
- print the results for the dork_term supplied
- """
- html = self.get_page(dork_term)
- self.stripURLs(html)
- return
- if __name__ == "__main__":
- dorky = Dorker()
- if len(sys.argv) > 1:
- dorky.dork(sys.argv[1])
- else:
- print ".::Google Dorker::."
- print
- print "Usage: ./googledorker.py [dork_term]"
- print
- print 'example: ./googledorker.py inurl:".php?*=*"'
- print "*Please ensure that you're dork in all in a single line, use %20 for spaces and + to combine search operators"
- print "See: http://k3170makan.blogspot.com/2012/01/science-of-google-dorking.html"
Add Comment
Please, Sign In to add comment