Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # This script scans for images that have data appended to them
- # and copies the images and the appended data to the folder appended-file-scanner-discoveries
- # with log at appended-file-scanner-log.txt
- #
- # Download from http://pastebin.com/raw.php?i=L3V9HxFh
- # and save as scanner.py
- #
- # You can simply save it to the folder you want to scan and run it.
- #
- # To scan specific files/folders, drag them to the script, or type
- # scanner.py files-or-folders-to-scan
- # at the command line.
- #
- # You need to have Python installed for this to work -- available from http://python.org/
- # Either Python 2.3+ or Python 3.* should work.
- import sys, os, shutil, struct, traceback
- if sys.version_info[0] < 3:
- ascii = repr
- input = raw_input
- def color_table_length(c):
- """Read the flags in a GIF file to get the length of the color table."""
- if c & 0x80:
- return 3 << ((c & 0x07) + 1)
- else:
- return 0
- def end_of_blocks(data, start):
- """Find the end of a sequence of GIF data sub-blocks."""
- i = start
- while i < len(data):
- if data[i] == 0:
- return i + 1
- else:
- i += data[i] + 1
- return len(data)
- def gif_end(data):
- """Find the end of GIF data in a file."""
- if type(data) == str:
- data2 = [ord(c) for c in data]
- else:
- data2 = data
- if len(data) <= 10:
- return len(data)
- i = color_table_length(data2[10]) + 13
- while i < len(data):
- if data2[i] == 0x2c:
- i += 9
- if i >= len(data):
- return len(data)
- i += color_table_length(data2[i]) + 2
- i = end_of_blocks(data2, i)
- elif data2[i] == 0x21:
- i = end_of_blocks(data2, i+2)
- elif data2[i] == 0x3b:
- return i + 1
- else:
- return i
- return min(i, len(data))
- def jpg_end(data):
- """Find the end of JPEG data in a file."""
- if type(data) == str:
- data2 = [ord(c) for c in data]
- else:
- data2 = data
- i = 2
- while i + 2 <= len(data):
- if data2[i] == 0xff and data2[i+1] >= 0xc0:
- if data2[i+1] == 0xd9:
- return i + 2
- elif data2[i+1] == 0xd8:
- return i
- elif 0xd0 <= data2[i+1] <= 0xd7:
- i += 2
- else:
- if i + 4 > len(data):
- return len(data)
- i += struct.unpack('>H', data[i+2:i+4])[0] + 2
- else:
- i += 1
- return min(i, len(data))
- def png_end(data):
- """Find the end of PNG data in a file."""
- i = 8
- while i + 8 < len(data):
- length, chunk_type = struct.unpack('>LL', data[i:i+8])
- if chunk_type == 1229278788: # IEND
- return min(i + 12, len(data))
- i += length + 12
- return min(i, len(data))
- def image_end(data):
- """Find the end of the image data in a file."""
- magic = data[:8]
- if type(magic) != str:
- magic = magic.decode('latin_1')
- if magic.startswith('GIF87a') or magic.startswith('GIF89a'):
- return gif_end(data)
- elif magic.startswith('\xFF\xD8'):
- return jpg_end(data)
- elif magic.startswith('\x89PNG\x0D\x0A\x1A\x0A'):
- return png_end(data)
- else:
- return len(data) # Report no appended data for non-images
- def scan(fullname, outpath, logwrite):
- """
- Scan file named fullname for appended content. If found:
- copy file and appended content to outpath
- report details to logwrite function
- return True
- """
- f = open(fullname, 'rb')
- try:
- data = f.read()
- finally:
- f.close()
- eoi = image_end(data)
- if eoi == len(data):
- return False
- logwrite(fullname)
- logwrite('found %d bytes starting with:' % (len(data) - eoi))
- logwrite(ascii(data[eoi:eoi+20]))
- outputbasename = os.path.basename(fullname)
- outputname = os.path.join(outpath, outputbasename)
- extractname = os.path.join(outpath, outputbasename + '.data')
- if os.path.realpath(outputname) != fullname:
- nrename = 1
- while os.path.exists(outputname) or os.path.exists(extractname):
- nrename += 1
- outputbasename = 'File %d named ' % nrename + os.path.basename(fullname)
- outputname = os.path.join(outpath, outputbasename)
- extractname = os.path.join(outpath, outputbasename + '.data')
- if not os.path.exists(outputname):
- shutil.copy(fullname, outputname)
- logwrite('copied to ' + outputbasename)
- if not os.path.exists(extractname):
- extractf = open(extractname, 'wb')
- try:
- extractf.write(data[eoi:])
- finally:
- extractf.close()
- logwrite('')
- return True
- logfile = None
- try:
- # Create output directory, log
- LOGNAME = 'appended-file-scanner-log.txt'
- OUTDIRNAME = 'appended-file-scanner-discoveries'
- scriptdir = os.path.dirname(os.path.realpath(__file__))
- logfullname = os.path.join(scriptdir, LOGNAME)
- outpath = os.path.join(scriptdir, OUTDIRNAME)
- try:
- os.mkdir(outpath)
- except OSError:
- pass
- logfile = open(logfullname, 'a+')
- def logwrite(line):
- line = ''.join([c for c in line if ' ' <= c <= '~' or c in '\t\n'])
- print(line)
- logfile.write(line + '\n')
- # Enumerate files to scan
- targets = [os.path.realpath(name) for name in sys.argv[1:]]
- if len(targets) == 0:
- targets = [scriptdir]
- logwrite('scanning')
- logwrite('\n'.join(targets))
- logwrite('')
- scanlist = []
- for target in targets:
- if os.path.isdir(target):
- for root, dirs, files in os.walk(target):
- for file in files:
- if file.lower().split('.')[-1] in ('gif', 'jpg', 'jpeg', 'png'):
- scanlist.append(os.path.join(root, file))
- else:
- scanlist.append(target)
- # Scan files
- anyfound = False
- for fullname in scanlist:
- try:
- datafound = scan(fullname, outpath, logwrite)
- if datafound:
- anyfound = True
- except:
- traceback.print_exc()
- traceback.print_exc(logfile)
- if not anyfound:
- logwrite('no files found\n')
- logfile.close()
- except:
- traceback.print_exc()
- if logfile != None:
- logfile.close()
- print('[press return]')
- input()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement