Advertisement
milo2012

Speed enhancement for metagoofil

Jan 9th, 2012
238
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.27 KB | None | 0 0
  1. --- metagoofil.py 2012-01-09 14:53:16.000000000 -0500
  2. +++ metagoofil.py 2012-01-09 14:51:49.000000000 -0500
  3. @@ -8,6 +8,7 @@
  4. import getopt
  5. import markup
  6. import warnings
  7. +from multiprocessing import Pool
  8. warnings.filterwarnings("ignore") # To prevent errors from hachoir deprecated functions, need to fix.
  9.  
  10. print "\n*************************************"
  11. @@ -34,13 +35,14 @@
  12. sys.exit()
  13.  
  14.  
  15. -global limit,filelimit,start,password,all,localanalysis,dir
  16. +global limit,filelimit,start,password,all,localanalysis,dir,counter
  17. limit=100
  18. filelimit=50
  19. start=0
  20. password=""
  21. all=[]
  22. dir="test"
  23. +counter=0
  24.  
  25. def writehtml(users,softs,paths,allinfo,fname,dir):
  26. page = markup.page()
  27. @@ -82,6 +84,15 @@
  28. file.close
  29. return "ok"
  30.  
  31. +def download(url):
  32. + global dir
  33. + url = url.strip()
  34. + save_to = os.path.basename(url)
  35. + urllib.urlretrieve(url, dir+"/"+save_to)
  36. + filename=str(url.split("/")[-1])
  37. + print "Downloaded %s" % url
  38. + return filename
  39. +
  40.  
  41. def doprocess(argv):
  42. localanalysis= "no"
  43. @@ -106,8 +117,10 @@
  44. elif opt == '-h':
  45. localanalysis=arg
  46. elif opt == '-n':
  47. + global filelimit
  48. filelimit = int(arg)
  49. elif opt == '-o':
  50. + global dir
  51. dir = arg
  52. elif opt == '-f':
  53. outhtml = arg
  54. @@ -117,6 +130,7 @@
  55. os.mkdir(dir)
  56. if localanalysis == "no":
  57. print "[-] Starting online search..."
  58. + f=open('urls.txt','w')
  59. for filetype in filetypes:
  60. print "\n[-] Searching for "+filetype+ " files, with a limit of " + str(limit)
  61. search=googlesearch.search_google(word,limit,start,filetype)
  62. @@ -125,39 +139,46 @@
  63. print "Results: " + str(len(files)) + " files found"
  64. print "Starting to download "+ str(filelimit) + " of them.."
  65. print "----------------------------------------------------\n"
  66. - counter=0
  67. + counter=0
  68. +
  69. for x in files:
  70. if counter <= filelimit:
  71. - print "["+str(counter)+"/"+str(filelimit)+"] " + x
  72. - getfile=downloader.downloader(x,dir)
  73. - getfile.down()
  74. - filename=getfile.name()
  75. - if filename !="":
  76. - if filetype == "pdf":
  77. - test=metadataPDF.metapdf(dir+"/"+filename,password)
  78. - elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
  79. - test=metadataMSOffice.metaMs2k(dir+"/"+filename)
  80. - if os.name=="posix":
  81. - testex=metadataExtractor.metaExtractor(dir+"/"+filename)
  82. - elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
  83. - test=metadataMSOfficeXML.metaInfoMS(dir+"/"+filename)
  84. - res=test.getData()
  85. - if res=="ok":
  86. - raw=test.getRaw()
  87. - users=test.getUsers()
  88. - paths=test.getPaths()
  89. - soft=test.getSoftware()
  90. - if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix":
  91. - testex.runExtract()
  92. - testex.getData()
  93. - paths.extend(testex.getPaths())
  94. - respack=[x,users,paths,soft,raw]
  95. - all.append(respack)
  96. - else:
  97. - print "error" #A error in the parsing process
  98. - else:
  99. - print "pass"
  100. - counter+=1
  101. + f.write(x+'\n')
  102. + else:
  103. + pass
  104. + counter+=1
  105. + f.close()
  106. +
  107. + pool = Pool(processes=4)
  108. + downloadResults = pool.map(download, open("urls.txt").readlines())
  109. + os.remove("urls.txt")
  110. +
  111. + for filename in downloadResults:
  112. + filetype=str(filename.split(".")[-1])
  113. + if filetype == "pdf":
  114. + test=metadataPDF.metapdf(dir+"/"+filename)
  115. + elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
  116. + test=metadataMSOffice.metaMs2k(dir+"/"+filename)
  117. + if os.name=="posix":
  118. + testex=metadataExtractor.metaExtractor(dir+"/"+filename)
  119. + elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
  120. + test=metadataMSOfficeXML.metaInfoMS(dir+"/"+filename)
  121. + res=test.getData()
  122. + if res=="ok":
  123. + raw=test.getRaw()
  124. + users=test.getUsers()
  125. + paths=test.getPaths()
  126. + soft=test.getSoftware()
  127. +
  128. + if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix":
  129. + testex.runExtract()
  130. + testex.getData()
  131. + paths.extend(testex.getPaths())
  132. + respack=[x,users,paths,soft,raw]
  133. + all.append(respack)
  134. + else:
  135. + print "error" #A error in the parsing process
  136. +
  137. else:
  138. print "[-] Starting local analysis in directory " + dir
  139. dirList=os.listdir(dir)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement