Advertisement
milo2012

Updated Metagoofil.py (Speed Improvement)

Jan 9th, 2012
579
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.01 KB | None | 0 0
  1. from discovery import googlesearch
  2. from extractors import *
  3. import urllib
  4. import os
  5. import downloader
  6. import processor
  7. import sys
  8. import getopt
  9. import markup
  10. import warnings
  11. from multiprocessing import Pool
  12. warnings.filterwarnings("ignore") # To prevent errors from hachoir deprecated functions, need to fix.
  13.  
  14. print "\n*************************************"
  15. print "* Metagoofil Ver 2.0 - Reborn *"
  16. print "* Christian Martorella *"
  17. print "* Edge-Security.com *"
  18. print "* cmartorella_at_edge-security.com *"
  19. print "* BACKTRACK 5 Edition!! *"
  20. print "*************************************"
  21.  
  22. def usage():
  23. print "Metagoofil 2.0:\n"
  24. print "Usage: metagoofil options\n"
  25. print " -d: domain to search"
  26. print " -t: filetype to download (pdf,doc,xls,ppt,odp,ods,docx,xlsx,pptx)"
  27. print " -l: limit of results to search (default 200)"
  28. print " -h: work with documents in directory (use \"yes\" for local analysis)"
  29. print " -n: limit of files to download"
  30. print " -o: working directory"
  31. print " -f: output file\n"
  32. print "Examples:"
  33. print " metagoofil.py -d microsoft.com -t doc,pdf -l 200 -n 50 -o microsoftfiles -f results.html"
  34. print " metagoofil.py -h yes -o microsoftfiles -f results.html (local dir analysis)\n"
  35. sys.exit()
  36.  
  37.  
  38. global limit,filelimit,start,password,all,localanalysis,dir,counter
  39. limit=100
  40. filelimit=50
  41. start=0
  42. password=""
  43. all=[]
  44. dir="test"
  45. counter=0
  46.  
  47. def writehtml(users,softs,paths,allinfo,fname,dir):
  48. page = markup.page()
  49. page.init (title="Metagoofil Results",css=('edge.css'),footer="Edge-security 2011")
  50. page.h2("Metagoofil results")
  51. page.h3("User names found:")
  52. page.ul( class_="userslist")
  53. page.li( users, class_="useritem")
  54. page.ul.close( )
  55. page.h3("Software versions found:")
  56. page.ul( class_="softlist")
  57. page.li(softs, class_="softitem")
  58. page.ul.close( )
  59. page.h3("Servers and paths found:")
  60. if paths!=[]:
  61. page.ul( class_="pathslist")
  62. page.li(paths, class_="pathitem")
  63. page.ul.close( )
  64. page.h3("Files analyzed:")
  65. page.ul( class_="files")
  66. for x in allinfo:
  67. page.li(x[0], class_="file")
  68. page.ul.close()
  69. page.h2("Files and metadata found:")
  70. for x in allinfo:
  71. page.h3(x[0])
  72. page.a("Local copy", class_="link", href=dir+"/"+x[0])
  73. page.pre(style="background:#C11B17;border:1px solid;")
  74. page.pre(x[1])
  75. page.pre(x[3])
  76. page.pre.close()
  77. file = open(fname,'w')
  78. for x in page.content:
  79. try:
  80. file.write(x)
  81. except:
  82. #print "Exception" + x # send to logs
  83. pass
  84. file.close
  85. return "ok"
  86.  
  87. def download(url):
  88. global dir
  89. url = url.strip()
  90. save_to = os.path.basename(url)
  91. urllib.urlretrieve(url, dir+"/"+save_to)
  92. filename=str(url.split("/")[-1])
  93. print "Downloaded %s" % url
  94. return filename
  95.  
  96.  
  97. def doprocess(argv):
  98. localanalysis= "no"
  99. if len(sys.argv) < 3:
  100. usage()
  101. try:
  102. opts,args = getopt.getopt(argv,"l:d:f:h:n:t:o:")
  103. except getopt.GetoptError:
  104. usage()
  105. for opt,arg in opts:
  106. if opt == '-d':
  107. word = arg
  108. elif opt == '-t':
  109. filetypes=[]
  110. if arg.count(",") != 0:
  111. filetypes = arg.split(",")
  112. else:
  113. filetypes.append(arg)
  114. print filetypes
  115. elif opt == '-l':
  116. limit = int(arg)
  117. elif opt == '-h':
  118. localanalysis=arg
  119. elif opt == '-n':
  120. global filelimit
  121. filelimit = int(arg)
  122. elif opt == '-o':
  123. global dir
  124. dir = arg
  125. elif opt == '-f':
  126. outhtml = arg
  127. if os.path.exists(dir):
  128. pass
  129. else:
  130. os.mkdir(dir)
  131. if localanalysis == "no":
  132. print "[-] Starting online search..."
  133. f=open('urls.txt','w')
  134. for filetype in filetypes:
  135. print "\n[-] Searching for "+filetype+ " files, with a limit of " + str(limit)
  136. search=googlesearch.search_google(word,limit,start,filetype)
  137. search.process_files()
  138. files=search.get_files()
  139. print "Results: " + str(len(files)) + " files found"
  140. print "Starting to download "+ str(filelimit) + " of them.."
  141. print "----------------------------------------------------\n"
  142. counter=0
  143.  
  144. for x in files:
  145. if counter <= filelimit:
  146. f.write(x+'\n')
  147. else:
  148. pass
  149. counter+=1
  150. f.close()
  151.  
  152. pool = Pool(processes=4)
  153. downloadResults = pool.map(download, open("urls.txt").readlines())
  154. os.remove("urls.txt")
  155.  
  156. for filename in downloadResults:
  157. filetype=str(filename.split(".")[-1])
  158. if filetype == "pdf":
  159. test=metadataPDF.metapdf(dir+"/"+filename)
  160. elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
  161. test=metadataMSOffice.metaMs2k(dir+"/"+filename)
  162. if os.name=="posix":
  163. testex=metadataExtractor.metaExtractor(dir+"/"+filename)
  164. elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
  165. test=metadataMSOfficeXML.metaInfoMS(dir+"/"+filename)
  166. res=test.getData()
  167. if res=="ok":
  168. raw=test.getRaw()
  169. users=test.getUsers()
  170. paths=test.getPaths()
  171. soft=test.getSoftware()
  172.  
  173. if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix":
  174. testex.runExtract()
  175. testex.getData()
  176. paths.extend(testex.getPaths())
  177. respack=[x,users,paths,soft,raw]
  178. all.append(respack)
  179. else:
  180. print "error" #A error in the parsing process
  181.  
  182. else:
  183. print "[-] Starting local analysis in directory " + dir
  184. dirList=os.listdir(dir)
  185. for filename in dirList:
  186. if filename !="":
  187. filetype=str(filename.split(".")[-1])
  188. if filetype == "pdf":
  189. test=metadataPDF.metapdf(dir+"/"+filename,password)
  190. elif filetype == "doc" or filetype == "ppt" or filetype == "xls":
  191. test=metadataMSOffice.metaMs2k(dir+"/"+filename)
  192. if os.name=="posix":
  193. testex=metadataExtractor.metaExtractor(dir+"/"+filename)
  194. elif filetype == "docx" or filetype == "pptx" or filetype == "xlsx":
  195. test=metadataMSOfficeXML.metaInfoMS(dir+"/"+filename)
  196. res=test.getData()
  197. if res=="ok":
  198. raw=test.getRaw()
  199. users=test.getUsers()
  200. paths=test.getPaths()
  201. soft=test.getSoftware()
  202. if (filetype == "doc" or filetype == "xls" or filetype == "ppt") and os.name=="posix":
  203. valid=testex.runExtract()
  204. if valid=="ok":
  205. testex.getData()
  206. paths.extend(testex.getPaths())
  207. else:
  208. pass
  209. soft=test.getSoftware()
  210. raw=test.getRaw()
  211. respack=[filename,users,paths,soft,raw]
  212. else:
  213. pass #An error in the parsing process
  214. else:
  215. pass
  216. all.append(respack)
  217.  
  218. proc=processor.processor(all)
  219. userlist=proc.sort_users()
  220. softlist=proc.sort_software()
  221. pathlist=proc.sort_paths()
  222. try:
  223. save = writehtml(userlist,softlist,pathlist,all,outhtml,dir)
  224. except:
  225. print "Error creating the file"
  226. print "\n[+] List of users found:"
  227. print "--------------------"
  228. for x in userlist:
  229. print x
  230. print "\n[+] List of software found:"
  231. print "-----------------------"
  232. for x in softlist:
  233. print x
  234. print "\n[+] List of paths and servers found:"
  235. print "--------------------------------"
  236. for x in pathlist:
  237. print x
  238.  
  239. if __name__ == "__main__":
  240. try: doprocess(sys.argv[1:])
  241. except KeyboardInterrupt:
  242. print "Process interrupted by user."
  243. except:
  244. sys.exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement