Advertisement
k3170makan

Python Google Dorker alpha

Mar 14th, 2012
1,064
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/python
  2. from BeautifulSoup import BeautifulSoup as soup
  3. import httplib
  4. import sys
  5. """
  6. **I've fixed some issues and upgraded the output a bit
  7.  
  8.     A simple extension to goofile.py
  9.     features in this version:
  10.         *allows you to dork from the command line, and returns critical information
  11.             about the targets like
  12.                 >title text of the page
  13.                 >a short description
  14.                 >the URL to the target
  15.     TODO:
  16.         >Duckduckgo search script on the way, im gonna include results from both search engines ;)
  17.         >server type detection
  18.         >dork presets for specifying targets with
  19.             *SQLi vulnerabilities in pages
  20.             *LFI/RFI vulnerabilities
  21.             *XSS vulnerabilities
  22.             *vulnerable files
  23.         I will implement this in such a way that you can localize a search to a given target
  24.         >CMS type detection
  25.         >I would like in the future to have googledorker learn from the searches you have performed and cache them
  26.             for faster results, and also use a lil machine learning to enhance the responsiveness to certain targets
  27.  
  28.     Depedencies:
  29.         are availble in the second line of the script!
  30.  
  31. >>>If you wanna get something really awesome going, you could plug these results into an nmap scan, and automate
  32. penetration testing XD
  33.  
  34. This was inspired by googile.py
  35.  
  36. Author: Keith Makan
  37. Twitter: k3170makan
  38. site:k3170makan.blogspot.com   
  39. """
  40. class resultManager:
  41.     """
  42.         An object to manage results
  43.             *title
  44.             *URL
  45.             *server type
  46.             *script_type
  47.         Ill just send a request to each server and swipe the details from the response headers ;)
  48.     """
  49.     def __init__(self):
  50.         return
  51.        
  52. class Dorker:
  53.     def __init__(self):
  54.         return
  55.     def get_page(self,dork):
  56.         h = httplib.HTTP('www.google.com')
  57.         h.putrequest('GET',"/search?num=500&q="+dork)
  58.         h.putheader('Host','www.google.com')
  59.         h.putheader('User-agent','Internet Explorer 6.0 ')
  60.         h.putheader('Referrer','k3170makan.blogspot.com')
  61.         h.endheaders()
  62.         returncode,returnmsg,headers = h.getreply()
  63.         html=h.getfile().read()
  64.         #print html
  65.         return html
  66.     def stripURLs(self,page):
  67.         soop = soup(page)
  68.         resTag = soop.findAll("div",{"id":"res"}) #get the divider that wraps the results
  69.         if len(resTag) == 0:
  70.             print page
  71.             print
  72.             print "Google is being naabs by refusing your queries, please wait about 10mins before trying again"
  73.             return []
  74.         results_wrapperTag = soup(str(resTag)).findAll("ol")
  75.         results_list = soup(str(results_wrapperTag)).findAll("li",{"class":"g"})
  76.  
  77.         result_URLs = []
  78.         for res in results_list: #I'm using beautifull soup here, but a lot of this can be sped up by using regex!
  79.             string_res = str(res)
  80.             result_h3TAG = soup(string_res).findAll("h3",{"class":"r"})
  81.             results_anchorTAG = soup(str(result_h3TAG)).findAll("a")
  82.             if len(results_anchorTAG) == 1:
  83.                 URL = str(results_anchorTAG).split("/url?q=")
  84.                 if len(URL) >= 1:
  85.                     try: #sometimes the search will return links to google images or other crap, if so i stop processing because the anchors have a slightly different form to the results, in which case the follow code will raise an exception.
  86.                         URL = URL[1].split(";")[0]
  87.                         URL = URL[:len(URL)-4] #okay so we have the url
  88.                         result_URLs.append(URL)
  89.                     except:
  90.                         return result_URLs
  91.  
  92.                 print "target:> %s " % (URL),
  93.             results_summaryTAG = soup(string_res).findAll("div",{"class":"s"})
  94.             if len(results_summaryTAG) == 1:
  95.                 for etc in results_summaryTAG:
  96.                     print "summary:\n\t>>%s" % (str(etc))
  97.             print
  98.         return result_URLs
  99.     def dork(self,dork_term):
  100.         """
  101.             print the results for the dork_term supplied
  102.         """
  103.         html = self.get_page(dork_term)
  104.         results = self.stripURLs(html)
  105.         print "listing URLS"
  106.         for index,result in enumerate(results): #enumerate is awesome btw!
  107.                 print index+1,"]",result
  108.         return
  109. if __name__ == "__main__":
  110.     dorky = Dorker()
  111.     if len(sys.argv) > 1:
  112.         print "Running dork <%s>" % (sys.argv[1])
  113.         dorky.dork(sys.argv[1])
  114.     else:
  115.         print ".::Google Dorker::."
  116.         print
  117.         print "Usage: ./googledorker.py [dork_term]"
  118.         print
  119.         print 'example: ./googledorker.py filetype:sql'
  120.         print "*Please ensure that you're dork in all in a single line, use %20 for spaces and + to combine search operators"
  121.         print "See: http://k3170makan.blogspot.com/2012/01/science-of-google-dorking.html"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement