Advertisement
Guest User

Untitled

a guest
Jan 17th, 2017
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.78 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. '''
  3. Carve PE files from binary data.
  4. Write them into the current directy named after their hash.
  5.  
  6. Example::
  7.  
  8. $ python carvepe.py unallocated.bin
  9. INFO:__main__:found pe at 0x0, length: 0xd8000
  10. INFO:__main__:writing pe file to 273ed32b617fd79ed1b88ebd4521a441.bin
  11.  
  12. $ ls
  13. 595f44fec1e92a71d3e9e77456ba80d1.bin
  14. 71f920fa275127a7b60fa4d4d41432a3.bin
  15. 43c191bf6d6c3f263a8cd0efd4a058ab.bin
  16.  
  17. author: Willi Ballenthin
  18. '''
  19. import sys
  20. import mmap
  21. import hashlib
  22. import logging
  23. import contextlib
  24. from collections import namedtuple
  25.  
  26. import pefile
  27. import argparse
  28.  
  29.  
  30. logger = logging.getLogger(__name__)
  31.  
  32.  
  33. Match = namedtuple('Match', ['offset', 'size'])
  34.  
  35.  
  36. def carve(data):
  37. """
  38. find things that look like PE files from arbitrary binary data.
  39.  
  40. Args:
  41. data (str): arbitrary byte string
  42.  
  43. Yields:
  44. Match: one Match instance per identified PE file.
  45. """
  46. offset = 0
  47.  
  48. while True:
  49. offset = data.find('MZ', offset)
  50. if offset == -1:
  51. break
  52.  
  53. logger.debug('found MZ: 0x%x', offset)
  54.  
  55. # grab a bunch of data that should include the entire binary.
  56. # assume less than 10mb.
  57. max_offset = min(len(data), offset + 10 * 1024 * 1024)
  58. payload = data[offset:max_offset]
  59.  
  60. try:
  61. pe = pefile.PE(data=payload)
  62. except pefile.PEFormatError:
  63. logger.debug('not actually a PE, sorry.')
  64. else:
  65. logger.debug('yup, this looks ok.')
  66.  
  67. # try to compute the size of the PE file.
  68. # we'll enumerate each section, and find the end of the last section.
  69. # this should work for most binaries, unless there is an overlay.
  70. # the PE file format does not have a true "file length" field, unfortunately.
  71. max_addr = 0
  72. for section in sorted(pe.sections, key=lambda s: s.PointerToRawData):
  73. section_max_addr = section.PointerToRawData + section.SizeOfRawData
  74. if section_max_addr > max_addr:
  75. max_addr = section_max_addr
  76.  
  77. if pe.OPTIONAL_HEADER.CheckSum == pe.generate_checksum():
  78. logger.debug('checksum verified')
  79.  
  80. yield Match(offset, max_addr)
  81.  
  82. offset += 1
  83.  
  84.  
  85. def main(argv=None):
  86. if argv is None:
  87. argv = sys.argv[1:]
  88.  
  89. parser = argparse.ArgumentParser(description="Carve PE files from binary data.")
  90. parser.add_argument("input", type=str,
  91. help="Path to input file")
  92. parser.add_argument("-v", "--verbose", action="store_true",
  93. help="Enable debug logging")
  94. parser.add_argument("-q", "--quiet", action="store_true",
  95. help="Disable all output but errors")
  96. args = parser.parse_args()
  97.  
  98. if args.verbose:
  99. logging.basicConfig(level=logging.DEBUG)
  100. elif args.quiet:
  101. logging.basicConfig(level=logging.ERROR)
  102. else:
  103. logging.basicConfig(level=logging.INFO)
  104.  
  105. with open(args.input, 'rb') as f:
  106. # we're using a memory map here.
  107. # it lets us read from a large file as if it were entirely in memory.
  108. # (but its not, actually)
  109. with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as m:
  110. for match in carve(m):
  111. pe = m[match.offset:match.offset+match.size]
  112. logger.info('found pe at 0x%x, length: 0x%x', match.offset, match.size)
  113.  
  114. m = hashlib.md5()
  115. m.update(pe)
  116. logger.debug('md5sum: %s', m.hexdigest())
  117.  
  118. outpath = m.hexdigest() + '.bin'
  119. logger.info('writing pe file to %s', outpath)
  120. with open(outpath, 'wb') as g:
  121. g.write(pe)
  122.  
  123.  
  124. if __name__ == "__main__":
  125. sys.exit(main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement