--- metagoofil.py 2012-01-09 14:53:16.000000000 -0500 +++ metagoofil.py 2012-01-09 14:51:49.000000000 -0500 @@ -8,6 +8,7 @@ import getopt import markup import warnings +from multiprocessing import Pool warnings.filterwarnings("ignore") # To prevent errors from hachoir deprecated functions, need to fix. print "\n*************************************" @@ -34,13 +35,14 @@ sys.exit() -global limit,filelimit,start,password,all,localanalysis,dir +global limit,filelimit,start,password,all,localanalysis,dir,counter limit=100 filelimit=50 start=0 password="" all=[] dir="test" +counter=0 def writehtml(users,softs,paths,allinfo,fname,dir): page = markup.page() @@ -82,6 +84,15 @@ file.close return "ok" +def download(url): + global dir + url = url.strip() + save_to = os.path.basename(url) + urllib.urlretrieve(url, dir+"/"+save_to) + filename=str(url.split("/")[-1]) + print "Downloaded %s" % url + return filename + def doprocess(argv): localanalysis= "no" @@ -106,8 +117,10 @@ elif opt == '-h': localanalysis=arg elif opt == '-n': + global filelimit filelimit = int(arg) elif opt == '-o': + global dir dir = arg elif opt == '-f': outhtml = arg @@ -117,6 +130,7 @@ os.mkdir(dir) if localanalysis == "no": print "[-] Starting online search..." + f=open('urls.txt','w') for filetype in filetypes: print "\n[-] Searching for "+filetype+ " files, with a limit of " + str(limit) search=googlesearch.search_google(word,limit,start,filetype) @@ -125,39 +139,46 @@ print "Results: " + str(len(files)) + " files found" print "Starting to download "+ str(filelimit) + " of them.." print "----------------------------------------------------\n" - counter=0 + counter=0 + for x in files: if counter <= filelimit: - print "["+str(counter)+"/"+str(filelimit)+"] " + x - getfile=downloader.downloader(x,dir) - getfile.down() - filename=getfile.name() - if filename !="": - if filetype == "pdf": - test=metadataPDF.metapdf(dir+"/"+filename,password) - elif filetype == "doc" or filetype == "ppt" or filetype == "xls": - test=metadataMSOffice.metaMs2k(dir+"/"+filename) - if os.name=="posix": - testex=metadataExtractor.metaExtractor(dir+"/"+filename) - elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx": - test=metadataMSOfficeXML.metaInfoMS(dir+"/"+filename) - res=test.getData() - if res=="ok": - raw=test.getRaw() - users=test.getUsers() - paths=test.getPaths() - soft=test.getSoftware() - if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix": - testex.runExtract() - testex.getData() - paths.extend(testex.getPaths()) - respack=[x,users,paths,soft,raw] - all.append(respack) - else: - print "error" #A error in the parsing process - else: - print "pass" - counter+=1 + f.write(x+'\n') + else: + pass + counter+=1 + f.close() + + pool = Pool(processes=4) + downloadResults = pool.map(download, open("urls.txt").readlines()) + os.remove("urls.txt") + + for filename in downloadResults: + filetype=str(filename.split(".")[-1]) + if filetype == "pdf": + test=metadataPDF.metapdf(dir+"/"+filename) + elif filetype == "doc" or filetype == "ppt" or filetype == "xls": + test=metadataMSOffice.metaMs2k(dir+"/"+filename) + if os.name=="posix": + testex=metadataExtractor.metaExtractor(dir+"/"+filename) + elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx": + test=metadataMSOfficeXML.metaInfoMS(dir+"/"+filename) + res=test.getData() + if res=="ok": + raw=test.getRaw() + users=test.getUsers() + paths=test.getPaths() + soft=test.getSoftware() + + if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix": + testex.runExtract() + testex.getData() + paths.extend(testex.getPaths()) + respack=[x,users,paths,soft,raw] + all.append(respack) + else: + print "error" #A error in the parsing process + else: print "[-] Starting local analysis in directory " + dir dirList=os.listdir(dir)