Data hosted with ♥ by Pastebin.com - Download Raw - See Original
  1. #!/usr/bin/python
  2.  
  3. class Caesar(str):
  4.  
  5.     """An implementation of the Caesar cipher."""
  6.  
  7.     def encipher(self, shift):
  8.         """Encipher input (plaintext) using the Caesar cipher and return it
  9.           (ciphertext)."""
  10.         ciphertext = []
  11.         for p in self:
  12.             if p.isalpha():
  13.                 ciphertext.append(chr((ord(p) - ord('Aa'[int(p.islower())]) +
  14.                 shift) % 26 + ord('Aa'[int(p.islower())])))
  15.             else:
  16.                 ciphertext.append(p)
  17.         return Caesar(''.join(ciphertext))
  18.  
  19.     def decipher(self, shift):
  20.         """Decipher input (ciphertext) using the Caesar cipher and return it
  21.           (plaintext)."""
  22.         return self.encipher(-shift)
  23.  
  24. class InputError(Exception):
  25.  
  26.     """This class is only used for throwing exceptions if the user supplies
  27.       invalid input (e.g. ciphertext is an empty string)."""
  28.  
  29.     pass
  30.  
  31. class Vigenere(str):
  32.  
  33.     """An implementation of the Vigenere cipher."""
  34.  
  35.     def encipher(self, key):
  36.         """Encipher input (plaintext) using the Vigenere cipher and return
  37.           it (ciphertext)."""
  38.         ciphertext = []
  39.         k = 0
  40.         n = len(key)
  41.         for i in range(len(self)):
  42.             p = self[i]
  43.             if p.isalpha():
  44.                 ciphertext.append(chr((ord(p) + ord(
  45.                 (key[k % n].upper(), key[k % n].lower())[int(p.islower())]
  46.                 ) - 2*ord('Aa'[int(p.islower())])) % 26 +
  47.                 ord('Aa'[int(p.islower())])))
  48.                 k += 1
  49.             else:
  50.                 ciphertext.append(p)
  51.         return Vigenere(''.join(ciphertext))
  52.  
  53.     def decipher(self, key):
  54.         """Decipher input (ciphertext) using the Vigenere cipher and return
  55.           it (plaintext)."""
  56.         plaintext = []
  57.         k = 0
  58.         n = len(key)
  59.         for i in range(len(self)):
  60.             c = self[i]
  61.             if c.isalpha():
  62.                 plaintext.append(chr((ord(c) - ord(
  63.                 (key[k % n].upper(), key[k % n].lower())[int(c.islower())]
  64.                 )) % 26 + ord('Aa'[int(c.islower())])))
  65.                 k += 1
  66.             else:
  67.                 plaintext.append(c)
  68.         return Vigenere(''.join(plaintext))
  69.  
  70. class VigCrack(Vigenere):
  71.  
  72.     """
  73.    VigCrack objects have methods to break Vigenere-encoded texts when the
  74.    original key is unknown.
  75.  
  76.    The technique used is based on the one described in:
  77.  
  78.    http://www.stonehill.edu/compsci/Shai_papers/RSA.pdf
  79.    (pages 9-10)
  80.  
  81.    Character frequencies taken from:
  82.    http://www.csm.astate.edu/~rossa/datasec/frequency.html (English)
  83.    http://www.characterfrequency.com/ (French, Italian, Portuguese, Spanish)
  84.    http://www.santacruzpl.org/readyref/files/g-l/ltfrqger.shtml (German)
  85.    """
  86.  
  87.     # Unless otherwise specified, test for codewords between (and including)
  88.     # these two lengths:
  89.     __default_min_codeword_length = 5
  90.     __default_max_codeword_length = 9
  91.  
  92.     # The following are language-specific data on character frequencies.
  93.     # Kappa is the "index of coincidence" described in the cryptography paper
  94.     # (link above).
  95.     __english_data = {
  96.                       'A':8.167, 'B':1.492, 'C':2.782, 'D':4.253, 'E':12.702,
  97.                       'F':2.228, 'G':2.015, 'H':6.094, 'I':6.996, 'J':0.153,
  98.                       'K':0.772, 'L':4.025, 'M':2.406, 'N':6.749, 'O':7.507,
  99.                       'P':1.929, 'Q':0.095, 'R':5.987, 'S':6.327, 'T':9.056,
  100.                       'U':2.758, 'V':0.978, 'W':2.360, 'X':0.150, 'Y':1.974,
  101.                       'Z':0.074, 'max_val':12.702, 'kappa':0.0667
  102.                      }
  103.  
  104.     __french_data = {
  105.                      'A':8.11, 'B':0.903, 'C':3.49, 'D':4.27, 'E':17.22,
  106.                      'F':1.14, 'G':1.09, 'H':0.769, 'I':7.44, 'J':0.339,
  107.                      'K':0.097, 'L':5.53, 'M':2.89, 'N':7.46, 'O':5.38,
  108.                      'P':3.02, 'Q':0.999, 'R':7.05, 'S':8.04, 'T':6.99,
  109.                      'U':5.65, 'V':1.30, 'W':0.039, 'X':0.435, 'Y':0.271,
  110.                      'Z':0.098, 'max_val':17.22, 'kappa':0.0746
  111.                     }
  112.  
  113.     __german_data = {
  114.                      'A':6.506, 'B':2.566, 'C':2.837, 'D':5.414, 'E':16.693,
  115.                      'F':2.044, 'G':3.647, 'H':4.064, 'I':7.812, 'J':0.191,
  116.                      'K':1.879, 'L':2.825, 'M':3.005, 'N':9.905, 'O':2.285,
  117.                      'P':0.944, 'Q':0.055, 'R':6.539, 'S':6.765, 'T':6.742,
  118.                      'U':3.703, 'V':1.069, 'W':1.396, 'X':0.022, 'Y':0.032,
  119.                      'Z':1.002, 'max_val':16.693, 'kappa':0.0767
  120.                     }
  121.  
  122.     __italian_data = {
  123.                       'A':11.30, 'B':0.975, 'C':4.35, 'D':3.80, 'E':11.24,
  124.                       'F':1.09, 'G':1.73, 'H':1.02, 'I':11.57, 'J':0.035,
  125.                       'K':0.078, 'L':6.40, 'M':2.66, 'N':7.29, 'O':9.11,
  126.                       'P':2.89, 'Q':0.391, 'R':6.68, 'S':5.11, 'T':6.76,
  127.                       'U':3.18, 'V':1.52, 'W':0.00, 'X':0.024, 'Y':0.048,
  128.                       'Z':0.958, 'max_val':11.57, 'kappa':0.0733
  129.                      }
  130.  
  131.     __portuguese_data = {
  132.                          'A':13.89, 'B':0.980, 'C':4.18, 'D':5.24, 'E':12.72,
  133.                          'F':1.01, 'G':1.17, 'H':0.905, 'I':6.70, 'J':0.317,
  134.                          'K':0.0174, 'L':2.76, 'M':4.54, 'N':5.37, 'O':10.90,
  135.                          'P':2.74, 'Q':1.06, 'R':6.67, 'S':7.90, 'T':4.63,
  136.                          'U':4.05, 'V':1.55, 'W':0.0104, 'X':0.272, 'Y':0.0165,
  137.                          'Z':0.400, 'max_val':13.89, 'kappa':0.0745
  138.                         }
  139.  
  140.     __spanish_data = {
  141.                       'A':12.09, 'B':1.21, 'C':4.20, 'D':4.65, 'E':13.89,
  142.                       'F':0.642, 'G':1.11, 'H':1.13, 'I':6.38, 'J':0.461,
  143.                       'K':0.038, 'L':5.19, 'M':2.86, 'N':7.23, 'O':9.58,
  144.                       'P':2.74, 'Q':1.37, 'R':6.14, 'S':7.43, 'T':4.49,
  145.                       'U':4.53, 'V':1.05, 'W':0.011, 'X':0.124, 'Y':1.14,
  146.                       'Z':0.324, 'max_val':13.89, 'kappa':0.0766
  147.                      }
  148.  
  149.     # The default language is set to English.
  150.     __lang = 'EN'
  151.     __lang_data = __english_data
  152.  
  153.     # This method sets the lang (__lang) attribute of a VigCrack object.
  154.     def set_language(self, language):
  155.         self.__lang = language.upper()
  156.         if self.__lang == 'DE':
  157.             self.__lang_data = self.__german_data
  158.         elif self.__lang == 'ES':
  159.             self.__lang_data = self.__spanish_data
  160.         elif self.__lang == 'FR':
  161.             self.__lang_data = self.__french_data
  162.         elif self.__lang == 'IT':
  163.             self.__lang_data = self.__italian_data
  164.         elif self.__lang == 'PT':
  165.             self.__lang_data = self.__portuguese_data
  166.         else:
  167.             self.__lang = 'EN'
  168.         return self
  169.  
  170.     # Rotate text n places to the right, wrapping around at the end.
  171.     def __rotate_right(self, n):
  172.         cutting_point = len(self) - (n % len(self))
  173.         return self[cutting_point:] + self[:cutting_point]
  174.  
  175.     # Get every nth char from a piece of text, from a given starting position.
  176.     def __get_every_nth_char(self, start, n):
  177.         accumulator = []
  178.         for i in range(len(self)):
  179.             if (i % n) == start:
  180.                 accumulator.append(self[i])
  181.         return VigCrack(''.join(accumulator)).set_language(self.__lang)
  182.  
  183.     # Build a dictionary containing the number of occurrences of each char.
  184.     def __count_char_freqs(self):
  185.         dictionary = {}
  186.         self = self.upper()
  187.         for char in self:
  188.             if char.isalpha():
  189.                 dictionary[char] = dictionary.get(char, 0) + 1
  190.         return dictionary
  191.  
  192.     # Scale the dictionary so that it can be compared with __lang_data.
  193.     def __scale(self, dictionary):
  194.         v = dictionary.values()
  195.         v.sort()
  196.         max_val = v[-1]
  197.         scaling_factor = self.__lang_data['max_val']/max_val
  198.         for (k, v) in dictionary.items():
  199.             dictionary[k] = v*scaling_factor
  200.         return dictionary
  201.  
  202.     # The residual error is the difference between a char's frequency in
  203.     # __lang_data and its frequency in the scaled dictionary from above.
  204.     # The error is then squared to remove a possible negative value.
  205.     def __sum_residuals_squared(self, dictionary):
  206.         sum = 0
  207.         for (k, v) in dictionary.items():
  208.             sum += (v - self.__lang_data[k])**2
  209.         return sum
  210.  
  211.     # Find the Caesar shift that brings the ciphertext closest to the
  212.     # character distribution of the plaintext's language.
  213.     def __find_best_caesar_shift(self):
  214.         best = 0
  215.         smallest_sum = -1
  216.         # Find the residual sum for each shift.
  217.         for shift in range(26):
  218.             encoded_text = Caesar(self).encipher(shift)
  219.             vigcrack_obj = VigCrack(encoded_text).set_language(self.__lang)
  220.             char_freqs = vigcrack_obj.__count_char_freqs()
  221.             scaled = vigcrack_obj.__scale(char_freqs)
  222.             current_sum = vigcrack_obj.__sum_residuals_squared(scaled)
  223.             # Keep track of the shift with the lowest residual sum.
  224.             # If there's a tie, the smallest shift wins.
  225.             if smallest_sum == -1:
  226.                 smallest_sum = current_sum
  227.             if current_sum < smallest_sum:
  228.                 best = shift
  229.                 smallest_sum = current_sum
  230.         return best
  231.  
  232.     def __find_codeword_length(self, min_length, max_length):
  233.         codeword_length = min_length
  234.         kappas = []
  235.         # Put the kappa value for each codeword length tested into an array.
  236.         for i in range(min_length, max_length + 1):
  237.             temp = self.__rotate_right(i)
  238.             coincidences = 0
  239.             for j in range(len(self)):
  240.                 if temp[j] == self[j]:
  241.                     coincidences += 1
  242.             kappas.append(float(coincidences)/len(self))
  243.         # Find out which value of kappa is closest to the kappa of the
  244.         # plaintext's language.  If there's a tie, the shortest codeword wins.
  245.         smallest_squared_diff = -1
  246.         for i in range((max_length + 1) - min_length):
  247.             current_squared_diff = (self.__lang_data['kappa'] - kappas[i])**2
  248.             if smallest_squared_diff == -1:
  249.                 smallest_squared_diff = current_squared_diff
  250.             if current_squared_diff < smallest_squared_diff:
  251.                 codeword_length = min_length + i
  252.                 smallest_squared_diff = current_squared_diff
  253.         return codeword_length
  254.  
  255.     def __find_codeword(self, min_length, max_length):
  256.         # Strip away invalid chars.
  257.         accumulator = []
  258.         for char in self:
  259.             if char.isalpha():
  260.                 accumulator.append(char)
  261.         alpha_only = VigCrack(''.join(accumulator)).set_language(self.__lang)
  262.         codeword_length = alpha_only.__find_codeword_length(min_length,
  263.                                                             max_length)
  264.         # Build the codeword by finding one character at a time.
  265.         codeword = []
  266.         for i in range(codeword_length):
  267.             temp = alpha_only.__get_every_nth_char(i, codeword_length)
  268.             shift = temp.__find_best_caesar_shift()
  269.             if shift == 0:
  270.                 codeword.append('A')
  271.             else:
  272.                 codeword.append(chr(ord('A') + (26 - shift)))
  273.         return VigCrack(''.join(codeword)).set_language(self.__lang)
  274.  
  275.     def __parse_args(self, *arg_list):
  276.         if len(arg_list) == 0:    # Use default values for codeword length.
  277.             min_length = self.__default_min_codeword_length
  278.             max_length = self.__default_max_codeword_length
  279.         elif len(arg_list) == 1:    # Exact codeword length specified by user.
  280.             min_length = max_length = int(arg_list[0])
  281.         else:    # min_length and max_length given by user.
  282.             min_length = int(arg_list[0])
  283.             max_length = int(arg_list[1])
  284.         # Check for input errors.
  285.         if min_length == max_length:
  286.             if min_length < 1:
  287.                 raise InputError('Codeword length is too small')
  288.         else:
  289.             if min_length < 1:
  290.                 raise InputError('min_length is too small')
  291.             if max_length < 1:
  292.                 raise InputError('max_length is too small')
  293.         if max_length < min_length:
  294.             raise InputError('max_length cannot be shorter than min_length')
  295.         if len(self) == 0:
  296.             raise InputError('Ciphertext is empty')
  297.         if len(self) < max_length:
  298.             raise InputError('Ciphertext is too short')
  299.         # Check that the ciphertext contains at least one valid character.
  300.         has_valid_char = False
  301.         for char in self:
  302.             if char.isalpha():
  303.                 has_valid_char = True
  304.                 break
  305.         if not has_valid_char:
  306.             raise InputError('No valid characters in ciphertext')
  307.         # If everything's all right, return the min_length and max_length.
  308.         return [min_length, max_length]
  309.  
  310.     def crack_codeword(self, *arg_list):
  311.         """
  312.        Try to find the codeword that encrypted the ciphertext object.
  313.        If no arguments are supplied, codewords between the default minimum
  314.        length and the default maximum length are tried.
  315.        If one integer argument is supplied, only codewords with that length
  316.        will be tried.
  317.        If two integer arguments are given then the first argument is treated
  318.        as a minimum codeword length, and the second argument is treated as a
  319.        maximum codeword length, to try.
  320.        """
  321.         array = self.__parse_args(*arg_list)
  322.         return self.__find_codeword(array[0], array[1])
  323.  
  324.     def crack_message(self, *arg_list):
  325.         """
  326.        Try to decode the ciphertext object.
  327.        This method accepts arguments in the same way as the crack_codeword()
  328.        method.
  329.        """
  330.         codeword = self.crack_codeword(*arg_list)
  331.         return self.decipher(codeword)
  332.  
  333. a = open('/percorso/file/lorem.txt').read()
  334.  
  335. key=VigCrack(a).crack_codeword(8,18)
  336. print key
  337. print Vigenere(a).decipher(key)