Advertisement
Guest User

MD5, VT Detect, Sections, EP, Size, NACO into MongoDB

a guest
Jan 19th, 2018
317
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.18 KB | None | 0 0
  1. import requests
  2. import time
  3. import json
  4. import os
  5. import hashlib
  6. from pymongo import MongoClient
  7. from pymongo.errors import DuplicateKeyError
  8. import pefile
  9.  
  10.  
  11. client = MongoClient('localhost', 27017)
  12. db = client.mw_folder
  13. collection = db.malwarka
  14. path = raw_input("Input path to file: ")
  15. filename = os.listdir(path)
  16. post = {}
  17.  
  18.  
  19. def getvtscan(md5hash):
  20. trusted_av = ['Microsoft', 'Malwarebytes', 'Kaspersky', 'ESET-NOD32']
  21. params = {'apikey': '5a287b7dc4b2cf6ecb5cbe8ae060aa3e64198c17c39312c0cf12cc316c78d1fb', 'resource': md5hash}
  22. headers = {
  23. "Accept-Encoding": "gzip, deflate",
  24. "User-Agent" : "gzip, My Python requests library example client or username"
  25. }
  26. response = requests.get('https://www.virustotal.com/vtapi/v2/file/report',
  27. params=params, headers=headers)
  28. json_response = response.json()
  29.  
  30. for av in trusted_av:
  31. result = json_response['scans'][av]['result']
  32. if result:
  33. return result
  34. else:
  35. return 'Clean'
  36.  
  37.  
  38. def getfilesize(path):
  39. st = os.stat(path)
  40. return st.st_size
  41.  
  42.  
  43. def md5Checksum(path):
  44. with open(path, 'rb') as fh:
  45. m = hashlib.md5()
  46. while True:
  47. data = fh.read(128000)
  48. if not data:
  49. break
  50. m.update(data)
  51. return m.hexdigest()
  52.  
  53.  
  54. def getentrypoint(path):
  55. try:
  56. pe = pefile.PE(path)
  57. epoint = pe.OPTIONAL_HEADER.AddressOfEntryPoint
  58. return epoint
  59. except pefile.PEFormatError:
  60. return 'Error'
  61.  
  62.  
  63. def getnaco(path):
  64. try:
  65. pe = pefile.PE(path)
  66. try:
  67. NACO = hashlib.md5((pe.FileInfo[0].StringTable[0].entries['ProductName']) + (pe.FileInfo[0].StringTable[0].entries['CompanyName'])).hexdigest()
  68. return NACO
  69. except:
  70. return 'Have no NaCo'
  71. except pefile.PEFormatError:
  72. print 'Error'
  73.  
  74.  
  75. def getfilesect(path):
  76. sect_arr = []
  77. try:
  78. pe = pefile.PE(path)
  79. for section in pe.sections:
  80. SectName = str(section.Name)
  81. SectRawSize = int(section.SizeOfRawData)
  82. SectMD5 = section.get_hash_md5()
  83. sect_arr.append({'Sect Name': SectName.rstrip('\x00\x00\x00\x00'),
  84. 'MD5 Sect': SectMD5,
  85. 'SectRawSize': str(SectRawSize)})
  86. return sect_arr
  87. except pefile.PEFormatError:
  88. print 'Error'
  89.  
  90.  
  91. count = 0
  92. while count <= len(filename)-1:
  93. v = str(path) + str(filename[count])
  94. md5h = md5Checksum(v)
  95. getnaco(v)
  96. post[count] = {'_id': md5Checksum(v),
  97. "Filename": filename[count],
  98. "Size": getfilesize(v),
  99. "AV Detect": getvtscan(md5h),
  100. "Entry Point": getentrypoint(v),
  101. "Sections": getfilesect(v),
  102. 'NACO': getnaco(v)}
  103. files = db.files
  104. try:
  105. post_id = files.insert_one(post[count]).inserted_id
  106. except DuplicateKeyError:
  107. print 'Duplicate' + post[count]['_id']
  108. except Exception, why:
  109. print str(why), filename[count]
  110. count+=1
  111. print 'We are parsing', count , 'files'
  112. time.sleep(15)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement