Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- #Covered by GPL V2.0
- import time
- import string
- import httplib,sys
- from socket import *
- import re
- import getopt
- import urllib
- import time
- import os
- """ Updated 3/19/2011 by Derv[eightytwo]@gmail.com to work with Google again """
- print "\n*************************************"
- print "*MetaGooFil Ver. 1.4c (rev by derv) *"
- print "*Coded by Christian Martorella *"
- print "*Edge-Security Research *"
- print "*cmartorella@edge-security.com *"
- print "*************************************\n\n"
- global word,w,limit,result,extcommand
- #Win
- ##extcommand='c:\extractor\\bin\extract.exe -l libextractor_ole2'
- #OSX
- #extcommand='/opt/local/bin/extract'
- #Cygwin
- #extcommand='/cygdrive/c/extractor/bin/extract.exe'
- extcommand='/usr/bin/extract'
- result =[]
- global dir
- dir = "none"
- def usage():
- print "MetaGooFil 1.4\n"
- print "usage: metagoofil options \n"
- print " -d: domain to search"
- print " -f: filetype to download (all,pdf,doc,xls,ppt,odp,ods, etc)"
- print " -l: limit of results to work with (default 100)"
- print " -o: output file, html format."
- print " -t: target directory to download files.\n"
- print " Example: metagoofil.py -d microsoft.com -l 20 -f all -o micro.html -t micro-files\n"
- sys.exit()
- #Mac address extractor#
- def get_mac(file,dir):
- filename=dir+"/"+file
- line=open(filename,'r')
- res=""
- for l in line:
- res+=l
- macrex=re.compile('-[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]\}')
- macgetter=macrex.findall(res)
- if macgetter==[]:
- mac=''
- else:
- mac=macgetter[0]
- mac=mac.strip("-")
- mac=mac.strip("}")
- mac=mac[:2]+":"+mac[2:4]+":"+mac[4:6]+":"+mac[6:8]+":"+mac[8:10]+":"+mac[10:12]
- return mac
- def get_pdf_meta(file,dir):
- from pyPdf import PdfFileReader
- filename=dir+"/"+file
- a=''
- try:
- input1 = pyPdf.PdfFileReader(file(filename, "rb"))
- a=input1.getDocumentInfo()
- except:
- print "error opening pdf"
- return a,a
- def get_info_pdf(file,dir):
- filename=dir+"/"+file
- line=open(filename,'r')
- res=""
- for l in line:
- res+=l
- arex=re.compile('xap:Author=.* ')
- getter=arex.findall(res)
- if getter==[]:
- aut3=''
- else:
- aut3=getter[0]
- aut3=aut3.split(" ")[0]
- aut3=aut3.replace("xap:Author='","")
- aut3=aut3.replace("'","")
- pat=""
- return aut3,pat
- rex=re.compile('xmpmeta')
- getter=rex.findall(res)
- if getter==[]:
- meta=''
- else:
- meta=getter[0]
- if meta=='':
- return meta,meta
- else:
- pass
- arex=re.compile('Author\(.*\)[<|\/]')
- getter=arex.findall(res)
- if getter==[]:
- aut=''
- else:
- aut=getter[0].split("/")[0]
- #print aut.split("/")[0]
- ###
- arex=re.compile('\<rdf:li\>(.*)\</rdf:li\>\</rdf:Seq\>')
- getter=arex.findall(res)
- if getter==[]:
- aut2=''
- else:
- temp=getter[0].replace("<rdf:li>","")
- aut2=temp.replace("</rdf:li>","")
- print aut2
- ##PATH
- rex=re.compile('Title\(.*\)[<|\/]')
- getter=rex.findall(res)
- if getter==[]:
- pat=''
- else:
- pat=getter[0]
- print pat.split("/")[0]
- #SOFTware
- arex=re.compile('Producer=(.*)>')
- getter=arex.findall(res)
- if getter==[]:
- producer=''
- else:
- producer=getter[0]
- producer=producer.replace("></rdf:Description","")
- arex=re.compile(' pdf:Producer=(.*) ')
- getter=arex.findall(res)
- if getter==[]:
- producer=''
- else:
- producer=getter[0]
- aut=aut+aut2+aut3
- return aut,pat
- def howmany(w):
- h = httplib.HTTP('www.google.com')
- h.putrequest('GET',"/search?hl=en&q=site%3A"+w+"+filetype%3A"+file)
- h.putheader('Host', 'www.google.com')
- h.putheader('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13')
- h.endheaders()
- returncode, returnmsg, headers = h.getreply()
- data=h.getfile().read()
- r1 = re.compile('About ([0123456789,]*) results')
- result = r1.findall(data)
- if result ==[]:
- r1 = re.compile('([0123456789]*) results')
- result = r1.findall(data)
- x = result[0]
- clean = re.sub(' <b>','',x)
- clean = re.sub('</b> ','',clean)
- clean = re.sub('About','',clean)
- clean = re.sub('results','',clean)
- clean = re.sub(',','',clean)
- clean = re.sub('of','',clean)
- cleani = int(clean)
- if len(result) == 0:
- cleani = 0
- return cleani
- def run(w,i):
- res = []
- h = httplib.HTTP('www.google.com')
- h.putrequest('GET',"/search?num=20&start="+str(i)+"&hl=en&q=site%3A"+w+"+filetype%3A"+file)
- h.putheader('Host', 'www.google.com')
- h.putheader('User-Agent','Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13')
- h.endheaders()
- returncode, returnmsg, headers = h.getreply()
- data=h.getfile().read()
- #r1 = re.compile('\[[A-Z]*\]</b>(<)/font></span> <h2 class=[^>]+><a href="([^"]+)"')
- r1 = re.compile('><a href="([^"]+.'+file+')"')
- res = r1.findall(data)
- return res
- def test(argv):
- global limit
- global file
- limit=20
- down ='a'
- if len(sys.argv) < 11:
- usage()
- try :
- opts, args = getopt.getopt(argv,"l:d:f:o:t:")
- except getopt.GetoptError:
- usage()
- for opt,arg in opts:
- if opt == '-l':
- limit = int(arg)
- elif opt == '-d':
- word = str(arg)
- elif opt == '-f':
- file = str(arg)
- elif opt == '-o':
- ofile = str(arg)
- elif opt =='-t':
- dir = str(arg)
- if dir == 'none':
- dir = word
- if file != 'all':
- all=[file]
- else:
- all=['pdf','doc','xls','ppt','sdw','mdb','sdc','odp','ods','docx',"xlsx","pptx"]
- try:
- fil = open(ofile,'w')
- except:
- print "Failed"
- test=extcommand.split(" ")[0]
- if os.path.isfile(test):
- print "[+] Command extract found, proceeding with leeching"
- else:
- print "Command extract not found, please check and change the location"
- sys.exit()
- date= time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
- fil.write("<style type=\"text\/css\"><!--BODY{font-family:sans-serif;}--></style>")
- fil.write("<center><b>Meta<font color=\"#0000cc\">G</font><font color=\"#ff0000\">o</font><font color=\"#ffff00\">o</font>fil</b> results page for:</center>")
- fil.write("<center><b>"+word+"</b></center>")
- fil.write("<center>"+date+"</center>")
- fil.write("<center><a href=\"http://www.edge-security.com\">By Edge-Security</a></center>")
- fil.write("<hr>")
- fil.write('<a href="#users">Results: Go directly to resuls.</a>')
- authors=[]
- pathos=[]
- for fi in all:
- file = fi
- print "[+] Searching in " + word + " for: " + file
- total = int(howmany(word))
- print "[+] Total results in google: "+ str(total)
- if total == 0:
- pass
- else:
- cant = 0
- fil.write("<hr>")
- fil.write("<strong><u>Searching in " + word + " for: " + file+" files.</u></strong><br><br>")
- if total < limit:
- limit=total
- print "[+] Limit: ",int(limit)
- result=[]
- while cant < limit:
- print "[+] Searching results: " + str(cant) +"\r"
- res = run(word,cant)
- for x in res:
- if result.count(x) == 0:
- if x.count('http')!=0:
- result.append(x)
- else:
- pass
- cant+=20
- fil.write("<strong>Total available files: "+str(total)+" </strong><br>")
- t=0
- if os.path.exists(dir):
- print "[+] Directory "+ dir + " already exist, reusing it"
- else:
- os.mkdir(dir)
- cantidad_todo=len(result)
- contador=0
- for x in result:
- contador+=1
- fil.write(x+"<br>")
- try:
- if down == "a" :
- np = 0
- res=x.split('://')[1]
- res=res.split("/")
- leng=len(res)
- filename=res[leng-1]
- try:
- print "\t[ "+str(contador)+"/"+str(cantidad_todo)+" ] "+ x
- if os.path.exists(dir+'/'+filename):
- pass
- else:
- urllib.urlretrieve(x,str(dir)+"/"+str(filename))
- except IOError:
- print "Can't download"
- np = 1
- if np == 0:
- fil.write("<br>Local copy " + "<a href=\""+dir+"/"+filename+'\">Open</a>')
- fil.write("<br><br>Important metadata:")
- command = extcommand +' '+ dir +'/'+'"'+filename+'"'
- try:
- stdin,stderr = os.popen4(command)
- except:
- print "Error executing extract, maybe the binary path is wrong."
- fil.write('<br>')
- mac=get_mac(filename,dir)
- author,path=get_info_pdf(filename,dir)
- if author!="":
- if authors.count(author) == 0:
- authors.append(author)
- if pathos.count(path) == 0:
- pathos.append(path)
- else:
- pass
- if file == 'pdf':
- fil.write('<pre style=\"background:#C11B17;border:1px solid;\" >')
- elif file == 'doc':
- fil.write('<pre style=\"background:#6698FF;border:1px solid;\">')
- elif file == 'xls':
- fil.write('<pre style=\"background:#437C2C;border:1px solid;\">')
- elif file == 'ppt':
- fil.write('<pre style=\"background:#E56717;border:1px solid;\">')
- else:
- fil.write('<pre style=\"background:#827839;border:1px solid;\">')
- if mac !='':
- fil.write('Mac address:' + mac +'\n' )
- else:
- pass
- for line in stderr.readlines():
- fil.write(line)
- au = re.compile('Author -.*')
- aut= au.findall(line)
- if aut != []:
- author=aut[0].split('- ')[1]
- if authors.count(author) == 0:
- authors.append(author)
- au = re.compile('creator -.*')
- aut= au.findall(line)
- if aut != []:
- author=aut[0].split('- ')[1]
- if authors.count(author) == 0:
- authors.append(author)
- au = re.compile('author -.*')
- aut= au.findall(line)
- if aut != []:
- author=aut[0].split('- ')[1]
- if authors.count(author) == 0:
- authors.append(author)
- last = re.compile('last saved by -.*')
- aut= last.findall(line)
- if aut != []:
- author=aut[0].split('- ')[1]
- if authors.count(author) == 0:
- authors.append(author)
- rev = re.compile(': Author \'.*\'')
- aut=rev.findall(line)
- if aut != []:
- author=aut[0].split('\'')[1]
- author=string.replace(author,'\'','')
- if authors.count(author) == 0:
- authors.append(author)
- pa= re.compile('worked on .*')
- pat=pa.findall(line)
- if pat !=[]:
- if pathos.count(pat) == 0:
- temp=pat[0].split('\'')[1]
- pathos.append(temp)
- pat=[]
- pa= re.compile('template -.*')
- pat=pa.findall(line)
- if pat !=[]:
- if pathos.count(pat) == 0:
- temp=pat[0].split('-')[1]
- pathos.append(temp)
- fil.write('</pre>')
- fil.write('<hr>')
- else:
- print "Can't Download "+ x
- fil.write("<br>Local copy, failed download :(\n")
- fil.write('<hr>')
- else:
- print "====================="
- except KeyboardInterrupt:
- print "Process Interrupted by user\n"
- sys.exit()
- t+=1
- fil.write("<strong>Total results for "+fi+": "+ str(t)+ "</strong><br>")
- fil.write('<hr>')
- fil.write('<a name="users">')
- fil.write('<br>')
- fil.write('<b><h2>Total authors found (potential users):</h2></b>')
- fil.write('<pre style=\"background:#737ca1;border:1px solid;\">')
- print "\n"
- print "Usernames found:"
- print "================"
- if authors != []:
- for x in authors:
- fil.write( str(x)+'<br>')
- print str(x)
- else:
- fil.write("0 users found.<br>")
- fil.write('</pre>')
- print "\n"
- print "Paths found:"
- print "============"
- fil.write('<b><h2>Path Disclosure:</h2></b>')
- fil.write('<pre style=\"background:#c8bbbe;border:1px solid;\">')
- paty=[]
- if pathos != []:
- for x in pathos:
- temp=""
- a=x.split('\\')
- for x in a:
- if x.count('.'):
- pass
- else:
- temp=temp+x+"\\"
- if paty.count(temp):
- pass
- else:
- fil.write(str(temp)+'<br>')
- paty.append(temp)
- print temp
- else:
- fil.write('0 path found.<br>')
- fil.write('</pre>')
- print "[+] Process finished"
- if __name__ == "__main__":
- try: test(sys.argv[1:])
- except KeyboardInterrupt:
- print "Process interrupted by user.."
- except:
- sys.exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement