Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python2
- '''
- Carve PE files from binary data.
- Write them into the current directy named after their hash.
- Example::
- $ python carvepe.py unallocated.bin
- INFO:__main__:found pe at 0x0, length: 0xd8000
- INFO:__main__:writing pe file to 273ed32b617fd79ed1b88ebd4521a441.bin
- $ ls
- 595f44fec1e92a71d3e9e77456ba80d1.bin
- 71f920fa275127a7b60fa4d4d41432a3.bin
- 43c191bf6d6c3f263a8cd0efd4a058ab.bin
- author: Willi Ballenthin
- '''
- import sys
- import mmap
- import hashlib
- import logging
- import contextlib
- from collections import namedtuple
- import pefile
- import argparse
- logger = logging.getLogger(__name__)
- Match = namedtuple('Match', ['offset', 'size'])
- def carve(data):
- """
- find things that look like PE files from arbitrary binary data.
- Args:
- data (str): arbitrary byte string
- Yields:
- Match: one Match instance per identified PE file.
- """
- offset = 0
- while True:
- offset = data.find('MZ', offset)
- if offset == -1:
- break
- logger.debug('found MZ: 0x%x', offset)
- # grab a bunch of data that should include the entire binary.
- # assume less than 10mb.
- max_offset = min(len(data), offset + 10 * 1024 * 1024)
- payload = data[offset:max_offset]
- try:
- pe = pefile.PE(data=payload)
- except pefile.PEFormatError:
- logger.debug('not actually a PE, sorry.')
- else:
- logger.debug('yup, this looks ok.')
- # try to compute the size of the PE file.
- # we'll enumerate each section, and find the end of the last section.
- # this should work for most binaries, unless there is an overlay.
- # the PE file format does not have a true "file length" field, unfortunately.
- max_addr = 0
- for section in sorted(pe.sections, key=lambda s: s.PointerToRawData):
- section_max_addr = section.PointerToRawData + section.SizeOfRawData
- if section_max_addr > max_addr:
- max_addr = section_max_addr
- if pe.OPTIONAL_HEADER.CheckSum == pe.generate_checksum():
- logger.debug('checksum verified')
- yield Match(offset, max_addr)
- offset += 1
- def main(argv=None):
- if argv is None:
- argv = sys.argv[1:]
- parser = argparse.ArgumentParser(description="Carve PE files from binary data.")
- parser.add_argument("input", type=str,
- help="Path to input file")
- parser.add_argument("-v", "--verbose", action="store_true",
- help="Enable debug logging")
- parser.add_argument("-q", "--quiet", action="store_true",
- help="Disable all output but errors")
- args = parser.parse_args()
- if args.verbose:
- logging.basicConfig(level=logging.DEBUG)
- elif args.quiet:
- logging.basicConfig(level=logging.ERROR)
- else:
- logging.basicConfig(level=logging.INFO)
- with open(args.input, 'rb') as f:
- # we're using a memory map here.
- # it lets us read from a large file as if it were entirely in memory.
- # (but its not, actually)
- with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as m:
- for match in carve(m):
- pe = m[match.offset:match.offset+match.size]
- logger.info('found pe at 0x%x, length: 0x%x', match.offset, match.size)
- m = hashlib.md5()
- m.update(pe)
- logger.debug('md5sum: %s', m.hexdigest())
- outpath = m.hexdigest() + '.bin'
- logger.info('writing pe file to %s', outpath)
- with open(outpath, 'wb') as g:
- g.write(pe)
- if __name__ == "__main__":
- sys.exit(main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement