Advertisement
cornarx

appended file scanner

Feb 26th, 2013
1,725
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.42 KB | None | 0 0
  1. # This script scans for images that have data appended to them
  2. # and copies the images and the appended data to the folder appended-file-scanner-discoveries
  3. # with log at appended-file-scanner-log.txt
  4. #
  5. # Download from http://pastebin.com/raw.php?i=L3V9HxFh
  6. # and save as scanner.py
  7. #
  8. # You can simply save it to the folder you want to scan and run it.
  9. #
  10. # To scan specific files/folders, drag them to the script, or type
  11. #   scanner.py files-or-folders-to-scan
  12. # at the command line.
  13. #
  14. # You need to have Python installed for this to work -- available from http://python.org/
  15. # Either Python 2.3+ or Python 3.* should work.
  16.  
  17. import sys, os, shutil, struct, traceback
  18. if sys.version_info[0] < 3:
  19.     ascii = repr
  20.     input = raw_input
  21.  
  22. def color_table_length(c):
  23.     """Read the flags in a GIF file to get the length of the color table."""
  24.     if c & 0x80:
  25.         return 3 << ((c & 0x07) + 1)
  26.     else:
  27.         return 0
  28.  
  29. def end_of_blocks(data, start):
  30.     """Find the end of a sequence of GIF data sub-blocks."""
  31.     i = start
  32.     while i < len(data):
  33.         if data[i] == 0:
  34.             return i + 1
  35.         else:
  36.             i += data[i] + 1
  37.     return len(data)
  38.  
  39. def gif_end(data):
  40.     """Find the end of GIF data in a file."""
  41.     if type(data) == str:
  42.         data2 = [ord(c) for c in data]
  43.     else:
  44.         data2 = data
  45.     if len(data) <= 10:
  46.         return len(data)
  47.     i = color_table_length(data2[10]) + 13
  48.     while i < len(data):
  49.         if data2[i] == 0x2c:
  50.             i += 9
  51.             if i >= len(data):
  52.                 return len(data)
  53.             i += color_table_length(data2[i]) + 2
  54.             i = end_of_blocks(data2, i)
  55.         elif data2[i] == 0x21:
  56.             i = end_of_blocks(data2, i+2)
  57.         elif data2[i] == 0x3b:
  58.             return i + 1
  59.         else:
  60.             return i
  61.     return min(i, len(data))
  62.  
  63. def jpg_end(data):
  64.     """Find the end of JPEG data in a file."""
  65.     if type(data) == str:
  66.         data2 = [ord(c) for c in data]
  67.     else:
  68.         data2 = data
  69.     i = 2
  70.     while i + 2 <= len(data):
  71.         if data2[i] == 0xff and data2[i+1] >= 0xc0:
  72.             if data2[i+1] == 0xd9:
  73.                 return i + 2
  74.             elif data2[i+1] == 0xd8:
  75.                 return i
  76.             elif 0xd0 <= data2[i+1] <= 0xd7:
  77.                 i += 2
  78.             else:
  79.                 if i + 4 > len(data):
  80.                     return len(data)
  81.                 i += struct.unpack('>H', data[i+2:i+4])[0] + 2
  82.         else:
  83.             i += 1
  84.     return min(i, len(data))
  85.  
  86. def png_end(data):
  87.     """Find the end of PNG data in a file."""
  88.     i = 8
  89.     while i + 8 < len(data):
  90.         length, chunk_type = struct.unpack('>LL', data[i:i+8])
  91.         if chunk_type == 1229278788: # IEND
  92.             return min(i + 12, len(data))
  93.         i += length + 12
  94.     return min(i, len(data))
  95.  
  96. def image_end(data):
  97.     """Find the end of the image data in a file."""
  98.     magic = data[:8]
  99.     if type(magic) != str:
  100.         magic = magic.decode('latin_1')
  101.     if magic.startswith('GIF87a') or magic.startswith('GIF89a'):
  102.         return gif_end(data)
  103.     elif magic.startswith('\xFF\xD8'):
  104.         return jpg_end(data)
  105.     elif magic.startswith('\x89PNG\x0D\x0A\x1A\x0A'):
  106.         return png_end(data)
  107.     else:
  108.         return len(data) # Report no appended data for non-images
  109.  
  110. def scan(fullname, outpath, logwrite):
  111.     """
  112.    Scan file named fullname for appended content.  If found:
  113.      copy file and appended content to outpath
  114.      report details to logwrite function
  115.      return True
  116.    """
  117.     f = open(fullname, 'rb')
  118.     try:
  119.         data = f.read()
  120.     finally:
  121.         f.close()
  122.     eoi = image_end(data)
  123.     if eoi == len(data):
  124.         return False
  125.     logwrite(fullname)
  126.     logwrite('found %d bytes starting with:' % (len(data) - eoi))
  127.     logwrite(ascii(data[eoi:eoi+20]))
  128.     outputbasename = os.path.basename(fullname)
  129.     outputname = os.path.join(outpath, outputbasename)
  130.     extractname = os.path.join(outpath, outputbasename + '.data')
  131.     if os.path.realpath(outputname) != fullname:
  132.         nrename = 1
  133.         while os.path.exists(outputname) or os.path.exists(extractname):
  134.             nrename += 1
  135.             outputbasename = 'File %d named ' % nrename + os.path.basename(fullname)
  136.             outputname = os.path.join(outpath, outputbasename)
  137.             extractname = os.path.join(outpath, outputbasename + '.data')
  138.     if not os.path.exists(outputname):
  139.         shutil.copy(fullname, outputname)
  140.         logwrite('copied to ' + outputbasename)
  141.     if not os.path.exists(extractname):
  142.         extractf = open(extractname, 'wb')
  143.         try:
  144.             extractf.write(data[eoi:])
  145.         finally:
  146.             extractf.close()
  147.     logwrite('')
  148.     return True
  149.  
  150. logfile = None
  151. try:
  152.     # Create output directory, log
  153.     LOGNAME = 'appended-file-scanner-log.txt'
  154.     OUTDIRNAME = 'appended-file-scanner-discoveries'
  155.     scriptdir = os.path.dirname(os.path.realpath(__file__))
  156.     logfullname = os.path.join(scriptdir, LOGNAME)
  157.     outpath = os.path.join(scriptdir, OUTDIRNAME)
  158.     try:
  159.         os.mkdir(outpath)
  160.     except OSError:
  161.         pass
  162.     logfile = open(logfullname, 'a+')
  163.     def logwrite(line):
  164.         line = ''.join([c for c in line if ' ' <= c <= '~' or c in '\t\n'])
  165.         print(line)
  166.         logfile.write(line + '\n')
  167.  
  168.     # Enumerate files to scan
  169.     targets = [os.path.realpath(name) for name in sys.argv[1:]]
  170.     if len(targets) == 0:
  171.         targets = [scriptdir]
  172.     logwrite('scanning')
  173.     logwrite('\n'.join(targets))
  174.     logwrite('')
  175.     scanlist = []
  176.     for target in targets:
  177.         if os.path.isdir(target):
  178.             for root, dirs, files in os.walk(target):
  179.                 for file in files:
  180.                     if file.lower().split('.')[-1] in ('gif', 'jpg', 'jpeg', 'png'):
  181.                         scanlist.append(os.path.join(root, file))
  182.         else:
  183.             scanlist.append(target)
  184.  
  185.     # Scan files
  186.     anyfound = False
  187.     for fullname in scanlist:
  188.         try:
  189.             datafound = scan(fullname, outpath, logwrite)
  190.             if datafound:
  191.                 anyfound = True
  192.         except:
  193.             traceback.print_exc()
  194.             traceback.print_exc(logfile)
  195.     if not anyfound:
  196.         logwrite('no files found\n')
  197.  
  198.     logfile.close()
  199. except:
  200.     traceback.print_exc()
  201.     if logfile != None:
  202.         logfile.close()
  203.  
  204. print('[press return]')
  205. input()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement