Advertisement
opexxx

Auto XOR decryptor.py

Nov 12th, 2014
366
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.60 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. from __future__ import print_function
  4. from __future__ import unicode_literals
  5. from itertools import groupby
  6. from operator import itemgetter
  7. import binascii
  8. import re
  9. import struct
  10. import argparse
  11. import os
  12.  
  13. '''
  14. ##     ## ########   ######      ######## ######## ######## #### ########    ###     ######  
  15. ###   ### ##     ## ##    ##     ##       ##       ##        ##     ##      ## ##   ##    ##
  16. #### #### ##     ## ##           ##       ##       ##        ##     ##     ##   ##  ##      
  17. ## ### ## ########  ##   ####    ######   ######   ######    ##     ##    ##     ##  ######  
  18. ##     ## ##   ##   ##    ##     ##       ##       ##        ##     ##    #########       ##
  19. ##     ## ##    ##  ##    ##     ##       ##       ##        ##     ##    ##     ## ##    ##
  20. ##     ## ##     ##  ######      ######## ##       ##       ####    ##    ##     ##  ######
  21. '''
  22.  
  23. '''
  24.    This program is free software: you can redistribute it and/or modify
  25.    it under the terms of the GNU General Public License as published by
  26.    the Free Software Foundation, either version 3 of the License, or
  27.    (at your option) any later version.
  28.  
  29.    This program is distributed in the hope that it will be useful,
  30.    but WITHOUT ANY WARRANTY; without even the implied warranty of
  31.    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  32.    GNU General Public License for more details.
  33.  
  34.    You should have received a copy of the GNU General Public License
  35.    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  36. '''
  37.  
  38. print ("Auto XOR decryptor by MRG Effitas. Developed and tested on Python 3.3!")
  39. print ("")
  40. print ("This tool can automagically find short XOR keys in a XOR encrypted binary file, and use ")
  41. print ("that to decrypt the XOR encrypted binary. Most parameters are good on default ")
  42. print ("but if it is not working for you, you might try to fine-tune those.")
  43.  
  44. parser = argparse.ArgumentParser()
  45. parser.add_argument("--input", action="store", dest="input", required=False,
  46.                     default="encrypted", help="input file name, default is encrypted")
  47.  
  48. parser.add_argument("--output", action="store", dest="output", required=False,
  49.                     default="decrypted", help="output file name, default is decrypted")
  50.  
  51. parser.add_argument("--xorkeyhex", action="store", dest="xor_key_hex",
  52.                     required=False, help="xor key in hex, e.g. 31323334")
  53.  
  54. parser.add_argument("--xorkeyascii", action="store", dest="xor_key_ascii",
  55.                     required=False, help="xor key in ascii, e.g. ijkl")
  56.  
  57. parser.add_argument("--offset", action="store", dest="offset", required=False,
  58.                     help="offset to rotate the xor key")
  59.  
  60. parser.add_argument("--keyminlen", action="store", dest="keyminlen", required=False,
  61.                     default="2", help="minimum key length (measured in hex string), default 2")
  62.  
  63. parser.add_argument("--patternmaxsearch", action="store", dest="patternmaxsearch", required=False,
  64.                     default="500", help="max length to search for pattern in the result, default 500")
  65.  
  66. parser.add_argument("--xorkeymaxsearch", action="store", dest="xorkeymaxsearch", required=False,
  67.                     default="500", help="max distance to search for XOR key in the encrypted file, default 500")
  68.  
  69.  
  70. parser.add_argument("--pattern", action="store", dest="pattern", required=False
  71.                     , default=b"program", help="pattern which is found in valid"
  72.                     "file. e.g. \"This program cannot be run in DOS mode\"."
  73.                     " Default is \"program\" . ")
  74.  
  75. # Parse the arguments
  76. args = parser.parse_args()
  77. filename = args.input
  78. output = args.output
  79. if args.xor_key_hex:
  80.     xor_key = args.xor_key_hex
  81. if args.xor_key_ascii:
  82.     xor_key_ascii = args.xor_key_ascii
  83. if args.offset:
  84.     offset = int(args.offset)
  85.  
  86. # the longest_common_substring and suffix_array methods are from http://stackoverflow.com/a/13693834/2716262
  87. def longest_common_substring(text):
  88.     """Get the longest common substrings and their positions.
  89.    >>> longest_common_substring('banana')
  90.    {'ana': [1, 3]}
  91.    >>> text = "not so Agamemnon, who spoke fiercely to "
  92.    >>> sorted(longest_common_substring(text).items())
  93.    [(' s', [3, 21]), ('no', [0, 13]), ('o ', [5, 20, 38])]
  94.  
  95.    This function can be easy modified for any criteria, e.g. for searching ten
  96.    longest non overlapping repeated substrings.
  97.    """
  98.     sa, rsa, lcp = suffix_array(text)
  99.     maxlen = max(lcp)
  100.     result = {}
  101.     for i in range(1, len(text)):
  102.         if lcp[i] == maxlen:
  103.             j1, j2, h = sa[i - 1], sa[i], lcp[i]
  104.             assert text[j1:j1 + h] == text[j2:j2 + h]
  105.             substring = text[j1:j1 + h]
  106.             if not substring in result:
  107.                 result[substring] = [j1]
  108.             # result[substring].append(j2)
  109.     # return dict((k, sorted(v)) for k, v in result.items())
  110.     return substring
  111.  
  112. def suffix_array(text, _step=16):
  113.     """Analyze all common strings in the text.
  114.  
  115.    Short substrings of the length _step a are first pre-sorted. The are the
  116.    results repeatedly merged so that the garanteed number of compared
  117.    characters bytes is doubled in every iteration until all substrings are
  118.    sorted exactly.
  119.  
  120.    Arguments:
  121.        text:  The text to be analyzed.
  122.        _step: Is only for optimization and testing. It is the optimal length
  123.               of substrings used for initial pre-sorting. The bigger value is
  124.               faster if there is enough memory. Memory requirements are
  125.               approximately (estimate for 32 bit Python 3.3):
  126.                   len(text) * (29 + (_size + 20 if _size > 2 else 0)) + 1MB
  127.  
  128.    Return value:      (tuple)
  129.      (sa, rsa, lcp)
  130.        sa:  Suffix array                  for i in range(1, size):
  131.               assert text[sa[i-1]:] < text[sa[i]:]
  132.        rsa: Reverse suffix array          for i in range(size):
  133.               assert rsa[sa[i]] == i
  134.        lcp: Longest common prefix         for i in range(1, size):
  135.               assert text[sa[i-1]:sa[i-1]+lcp[i]] == text[sa[i]:sa[i]+lcp[i]]
  136.               if sa[i-1] + lcp[i] < len(text):
  137.                   assert text[sa[i-1] + lcp[i]] < text[sa[i] + lcp[i]]
  138.    >>> suffix_array(text='banana')
  139.    ([5, 3, 1, 0, 4, 2], [3, 2, 5, 1, 4, 0], [0, 1, 3, 0, 0, 2])
  140.  
  141.    Explanation: 'a' < 'ana' < 'anana' < 'banana' < 'na' < 'nana'
  142.    The Longest Common String is 'ana': lcp[2] == 3 == len('ana')
  143.    It is between  tx[sa[1]:] == 'ana' < 'anana' == tx[sa[2]:]
  144.    """
  145.     tx = text
  146.     size = len(tx)
  147.     step = min(max(_step, 1), len(tx))
  148.     sa = list(range(len(tx)))
  149.     sa.sort(key=lambda i: tx[i:i + step])
  150.     grpstart = size * [False] + [True]  # a boolean map for iteration speedup.
  151.     # It helps to skip yet resolved values. The last value True is a sentinel.
  152.     rsa = size * [None]
  153.     stgrp, igrp = '', 0
  154.     for i, pos in enumerate(sa):
  155.         st = tx[pos:pos + step]
  156.         if st != stgrp:
  157.             grpstart[igrp] = (igrp < i - 1)
  158.             stgrp = st
  159.             igrp = i
  160.         rsa[pos] = igrp
  161.         sa[i] = pos
  162.     grpstart[igrp] = (igrp < size - 1 or size == 0)
  163.     while grpstart.index(True) < size:
  164.         # assert step <= size
  165.         nextgr = grpstart.index(True)
  166.         while nextgr < size:
  167.             igrp = nextgr
  168.             nextgr = grpstart.index(True, igrp + 1)
  169.             glist = []
  170.             for ig in range(igrp, nextgr):
  171.                 pos = sa[ig]
  172.                 if rsa[pos] != igrp:
  173.                     break
  174.                 newgr = rsa[pos + step] if pos + step < size else -1
  175.                 glist.append((newgr, pos))
  176.             glist.sort()
  177.             for ig, g in groupby(glist, key=itemgetter(0)):
  178.                 g = [x[1] for x in g]
  179.                 sa[igrp:igrp + len(g)] = g
  180.                 grpstart[igrp] = (len(g) > 1)
  181.                 for pos in g:
  182.                     rsa[pos] = igrp
  183.                 igrp += len(g)
  184.         step *= 2
  185.     del grpstart
  186.     # create LCP array
  187.     lcp = size * [None]
  188.     h = 0
  189.     for i in range(size):
  190.         if rsa[i] > 0:
  191.             j = sa[rsa[i] - 1]
  192.             while i != size - h and j != size - h and tx[i + h] == tx[j + h]:
  193.                 h += 1
  194.             lcp[rsa[i]] = h
  195.             if h > 0:
  196.                 h -= 1
  197.     if size > 0:
  198.         lcp[0] = 0
  199.     return sa, rsa, lcp
  200.  
  201. # XOR stream cipher
  202. def cipher(infile, outfile, padfile):
  203.     block_size = len(padfile)
  204.     while 1:
  205.         # read data in block of the XOR key size
  206.         data = infile.read(block_size)
  207.         if not data:
  208.             break
  209.         # Python 2.7 BUG: bytes and strings are a bit messed up here
  210.         # when you are working with older Python versions.
  211.         # Might be fixed in future versions
  212.         encoded = [ a ^ b for a, b in zip(data, padfile) ]
  213.         for item in encoded:
  214.             # write the result to the output file
  215.             outfile.write (struct.pack("B", item))
  216.  
  217. filename = args.input
  218. # only guess the XOR key based on the first 500 (or whatever you defined) bytes
  219. with open(filename, 'rb') as f:
  220.     content = f.read(int(args.xorkeymaxsearch))
  221.     #print (args.xorkeymaxsearch)
  222. f.close()
  223.  
  224. try:
  225.     xor_key
  226. # if we don't have the final xor_key from the user
  227. except NameError:  
  228.     try:
  229.         xor_key_ascii
  230.         # but the user defined the ascii xor key
  231.         xor_key = binascii.hexlify(xor_key_ascii.encode('ascii'))
  232.         # xor_key = ''.join(long_xor_key[i:i + 2]
  233.         #                for i in range(0, len(long_xor_key), 2))
  234.         #print (xor_key)                
  235.     except NameError:
  236.         # when the user have not provided any guess about the XOR key  
  237.         long_xor_key = str(longest_common_substring(binascii.hexlify(content)))
  238.         #print ("longxorkey: " +  long_xor_key)
  239.         formatted_hex = ''.join(long_xor_key[i:i + 2] for i in range(0,
  240.                         len(long_xor_key), 2))
  241.         print ("XOR key: " + formatted_hex)
  242.         r = re.compile("(.{" + args.keyminlen + r",}?)\1+")            
  243.         xor_key_list = r.findall(formatted_hex)
  244.         xor_key = xor_key_list[0]
  245. #print (xor_key)    
  246. # binary XOR key to be used in decryption
  247. xor_key_bin = binascii.unhexlify(xor_key)
  248. print ("XOR key ascii: " + str(xor_key_bin))
  249. print ("XOR key hex: " + str(binascii.hexlify(xor_key_bin)))
  250. data = open (filename, 'rb+')
  251. edit = data.read ()
  252. data.close ()
  253.  
  254. try:
  255.     offset
  256. except NameError:
  257.     # when the user have not provided any offset, try to guess it
  258.     offset = len(xor_key_bin) - (edit.find (xor_key_bin) % len(xor_key_bin))
  259. print ("Offset: " + str(offset))
  260.  
  261. def rotate(str1, n):
  262.     rotated = str1[n:] + str1[:n]
  263.     return rotated
  264.  
  265. # generate the final XOR key, rotate the xor_key with the offset
  266. final_xor_key = binascii.unhexlify(rotate(xor_key, offset * 2))
  267. print("Final XOR key: " + str(final_xor_key))
  268. with open(output, 'wb') as d:
  269.     with open(filename, 'rb') as f:
  270.         # call the XOR decryptor with input file, output file and XOR key
  271.         cipher (f, d, final_xor_key)
  272. f.close()
  273. d.close()
  274.  
  275. # Test whether decryption was successful, only check first 500 bytes.
  276. filehandle = open (output, 'rb+')
  277. data = filehandle.read (int(args.patternmaxsearch))
  278. filehandle.close ()
  279. pattern = args.pattern
  280. rx = re.compile(pattern, re.IGNORECASE | re.MULTILINE | re.DOTALL)
  281. result = rx.findall(data)
  282. if not result:
  283.     print ("Decrypt failed, or check your pattern. Output destination file has been deleted.")
  284.     os.remove(output)
  285. else:
  286.     print ("Great success! input read from : " + str(filename) +
  287.            ", output written to : " + str(output))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement