Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- from BeautifulSoup import BeautifulSoup as soup
- import httplib
- import sys
- """
- **I've fixed some issues and upgraded the output a bit
- A simple extension to goofile.py
- features in this version:
- *allows you to dork from the command line, and returns critical information
- about the targets like
- >title text of the page
- >a short description
- >the URL to the target
- TODO:
- >Duckduckgo search script on the way, im gonna include results from both search engines ;)
- >server type detection
- >dork presets for specifying targets with
- *SQLi vulnerabilities in pages
- *LFI/RFI vulnerabilities
- *XSS vulnerabilities
- *vulnerable files
- I will implement this in such a way that you can localize a search to a given target
- >CMS type detection
- >I would like in the future to have googledorker learn from the searches you have performed and cache them
- for faster results, and also use a lil machine learning to enhance the responsiveness to certain targets
- Depedencies:
- are availble in the second line of the script!
- >>>If you wanna get something really awesome going, you could plug these results into an nmap scan, and automate
- penetration testing XD
- This was inspired by googile.py
- Author: Keith Makan
- Twitter: k3170makan
- site:k3170makan.blogspot.com
- """
- class resultManager:
- """
- An object to manage results
- *title
- *URL
- *server type
- *script_type
- Ill just send a request to each server and swipe the details from the response headers ;)
- """
- def __init__(self):
- return
- class Dorker:
- def __init__(self):
- return
- def get_page(self,dork):
- h = httplib.HTTP('www.google.com')
- h.putrequest('GET',"/search?num=500&q="+dork)
- h.putheader('Host','www.google.com')
- h.putheader('User-agent','Internet Explorer 6.0 ')
- h.putheader('Referrer','k3170makan.blogspot.com')
- h.endheaders()
- returncode,returnmsg,headers = h.getreply()
- html=h.getfile().read()
- #print html
- return html
- def stripURLs(self,page):
- soop = soup(page)
- resTag = soop.findAll("div",{"id":"res"}) #get the divider that wraps the results
- if len(resTag) == 0:
- print page
- print
- print "Google is being naabs by refusing your queries, please wait about 10mins before trying again"
- return []
- results_wrapperTag = soup(str(resTag)).findAll("ol")
- results_list = soup(str(results_wrapperTag)).findAll("li",{"class":"g"})
- result_URLs = []
- for res in results_list: #I'm using beautifull soup here, but a lot of this can be sped up by using regex!
- string_res = str(res)
- result_h3TAG = soup(string_res).findAll("h3",{"class":"r"})
- results_anchorTAG = soup(str(result_h3TAG)).findAll("a")
- if len(results_anchorTAG) == 1:
- URL = str(results_anchorTAG).split("/url?q=")
- if len(URL) >= 1:
- try: #sometimes the search will return links to google images or other crap, if so i stop processing because the anchors have a slightly different form to the results, in which case the follow code will raise an exception.
- URL = URL[1].split(";")[0]
- URL = URL[:len(URL)-4] #okay so we have the url
- result_URLs.append(URL)
- except:
- return result_URLs
- print "target:> %s " % (URL),
- results_summaryTAG = soup(string_res).findAll("div",{"class":"s"})
- if len(results_summaryTAG) == 1:
- for etc in results_summaryTAG:
- print "summary:\n\t>>%s" % (str(etc))
- print
- return result_URLs
- def dork(self,dork_term):
- """
- print the results for the dork_term supplied
- """
- html = self.get_page(dork_term)
- results = self.stripURLs(html)
- print "listing URLS"
- for index,result in enumerate(results): #enumerate is awesome btw!
- print index+1,"]",result
- return
- if __name__ == "__main__":
- dorky = Dorker()
- if len(sys.argv) > 1:
- print "Running dork <%s>" % (sys.argv[1])
- dorky.dork(sys.argv[1])
- else:
- print ".::Google Dorker::."
- print
- print "Usage: ./googledorker.py [dork_term]"
- print
- print 'example: ./googledorker.py filetype:sql'
- print "*Please ensure that you're dork in all in a single line, use %20 for spaces and + to combine search operators"
- print "See: http://k3170makan.blogspot.com/2012/01/science-of-google-dorking.html"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement