Advertisement
opexxx

entropy_scan.py

May 15th, 2014
255
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.59 KB | None | 0 0
  1. """ Entropy scan
  2.    H() and entropy_scan() originally by Ero Carrera (blog.dkbza.org)
  3.  
  4.    Modified May 2007 by cyphunk (deadhacker.com)
  5.    Modified Dec 2009 by cyphunk
  6.  
  7.    USAGE:
  8.    cmd [target_path]
  9.    """
  10.  
  11. # FLAGS:
  12. SHOWPROGRESS = 1       # Show console progress bar?
  13. PRINTONTHRESHOLD = 6.8 # When block is > than threshold
  14.                        # print first 16 bytes in both
  15.                        # hex and ascii.  Set to 0 to turn
  16.                        # off.
  17. ONLYFIRSTBLOCK = 0     # Set to 1 it will only print the first
  18.                        # block that goes over threshold and not
  19.                        # blocks > threshold that are only offset
  20.                        # by 1.  By setting to zero block windows
  21.                        # that match will be printed.
  22. BLOCKSIZE = 256        # size of blocks scanned.
  23.  
  24. import math
  25. import random
  26. from pylab import *
  27. from matplotlib.ticker import MultipleLocator, FormatStrFormatter
  28. import tkFileDialog
  29. from Tkinter import *
  30. from progressBar import *
  31. from binascii import hexlify
  32. import string
  33. import os
  34. import cPickle # cache results
  35.  
  36. def H(data):
  37.   if not data:
  38.     return 0
  39.   entropy = 0
  40.   for x in range(256):
  41.     p_x = float(data.count(chr(x)))/len(data)
  42.     if p_x > 0:
  43.       entropy += - p_x*math.log(p_x, 2)
  44.   return entropy
  45.  
  46. def entropy_scan (data, block_size) :
  47.   if SHOWPROGRESS:
  48.       progress = progressBar(0, len(data) - block_size, 77)
  49.   # creates blocks of block_size for all possible offsets ('x'):
  50.   blocks = (data[x : block_size + x] for x in range (len (data) - block_size))
  51.   i = 0
  52.   for block in (blocks) :
  53.     i += 1
  54.     if SHOWPROGRESS:
  55.         progress(i)
  56.     yield H (block)
  57.  
  58. # performance improvement if you have psyco
  59. try:
  60.   import psyco
  61.   psyco.full()
  62.   print "got psyco"
  63. except ImportError:
  64.   pass
  65.  
  66. # get target file as argument var or from dialog:
  67. filename = ""
  68. if sys.argv[1:]:
  69.     filename = sys.argv[1]
  70. else:
  71.     root = Tk()
  72.     root.withdraw()
  73.     filename = tkFileDialog.askopenfilename(title="Target binary",
  74.                                         filetypes=[("All files", "*")])
  75.  
  76. # run, print graph:
  77.  
  78. if filename:
  79.     # Open file and scan for entropy:
  80.     if os.path.splitext(filename)[1] == ".entropy":
  81.         print "File is a cached '.entropy' from previous scan"
  82.         results = cPickle.load(open(filename, 'rb'))
  83.         filename = os.path.splitext(filename)[0]
  84.         print filenamea
  85.         raw = open(filename, 'rb').read()
  86.     else:
  87.         raw = open(filename, 'rb').read()
  88.         # debug with test data:
  89.         """
  90.        import random
  91.        raw = ''.join (
  92.        [chr (random.randint (0, 64)) for x in xrange (1024)] +
  93.        [chr (random.randint (0, 255)) for x in xrange (1024)] +
  94.        [chr (random.randint (0, 64)) for x in xrange (1024)] )
  95.        """
  96.         results = list( entropy_scan(raw,BLOCKSIZE) )
  97.         print "saving cache of entropy scan data to %s" % filename+".entropy"
  98.         cPickle.dump(results, open(filename+".entropy", 'wb'))
  99.  
  100.     # Print blocks that are above a defined threshold of entropy:
  101.     if PRINTONTHRESHOLD > 0:
  102.         print
  103.         found = 0
  104.         for i in range(len(results)):
  105.             if results[i] > PRINTONTHRESHOLD:
  106.                 if found == 0:
  107.                     table = string.maketrans("rnt", '   ') # don't like newlines
  108.                     #blockstr = string.translate(str(raw[i : i+16]), table) # translate to string value
  109.                     print "0x%8x %.2f: %s %s" % (i, results[i], hexlify(raw[i : i+8]),
  110.                                                      hexlify(raw[i+8 : i+16]))
  111.                     #%.3f - %016X / %s" % (i, results[i], raw[i : i + 16], raw[i : i + 16])
  112.                     found = ONLYFIRSTBLOCK
  113.             else:
  114.                 found = 0
  115.  
  116.     # Plot
  117.     filesize = os.path.getsize(filename)
  118.     imgdpi = 100
  119.     imgwidth = filesize / imgdpi
  120.  
  121.     if imgwidth > 327:
  122.       imgwidth = 327
  123.  
  124.     majorLocator   = MultipleLocator(0x400)   # mark every 1024 bytes
  125.     majorFormatter = FormatStrFormatter('%X') # change to %d to see decimal offsets
  126.  
  127.     ax = subplot(111)
  128.     plot(results, linewidth=2.0, antialiased=False)
  129.     subplots_adjust(left=0.02, right=0.99, bottom=0.2)
  130.  
  131.     ax.axis([0,filesize,0,8])
  132.     ax.xaxis.set_major_locator(majorLocator)
  133.     ax.xaxis.set_major_formatter(majorFormatter)
  134.     xticks(rotation=315)
  135.  
  136.     xlabel('block (byte offset)')
  137.     ylabel('entropy')
  138.     title('Entropy levels')
  139.  
  140.     grid(True)
  141.  
  142.     img = gcf()
  143.     img.set_size_inches(imgwidth, 6)
  144.     img.savefig(filename+".png", dpi=imgdpi)
  145.  
  146.     draw()
  147.     show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement