Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- class Caesar(str):
- """An implementation of the Caesar cipher."""
- def encipher(self, shift):
- """Encipher input (plaintext) using the Caesar cipher and return it
- (ciphertext)."""
- ciphertext = []
- for p in self:
- if p.isalpha():
- ciphertext.append(chr((ord(p) - ord('Aa'[int(p.islower())]) +
- shift) % 26 + ord('Aa'[int(p.islower())])))
- else:
- ciphertext.append(p)
- return Caesar(''.join(ciphertext))
- def decipher(self, shift):
- """Decipher input (ciphertext) using the Caesar cipher and return it
- (plaintext)."""
- return self.encipher(-shift)
- class InputError(Exception):
- """This class is only used for throwing exceptions if the user supplies
- invalid input (e.g. ciphertext is an empty string)."""
- pass
- class Vigenere(str):
- """An implementation of the Vigenere cipher."""
- def encipher(self, key):
- """Encipher input (plaintext) using the Vigenere cipher and return
- it (ciphertext)."""
- ciphertext = []
- k = 0
- n = len(key)
- for i in range(len(self)):
- p = self[i]
- if p.isalpha():
- ciphertext.append(chr((ord(p) + ord(
- (key[k % n].upper(), key[k % n].lower())[int(p.islower())]
- ) - 2*ord('Aa'[int(p.islower())])) % 26 +
- ord('Aa'[int(p.islower())])))
- k += 1
- else:
- ciphertext.append(p)
- return Vigenere(''.join(ciphertext))
- def decipher(self, key):
- """Decipher input (ciphertext) using the Vigenere cipher and return
- it (plaintext)."""
- plaintext = []
- k = 0
- n = len(key)
- for i in range(len(self)):
- c = self[i]
- if c.isalpha():
- plaintext.append(chr((ord(c) - ord(
- (key[k % n].upper(), key[k % n].lower())[int(c.islower())]
- )) % 26 + ord('Aa'[int(c.islower())])))
- k += 1
- else:
- plaintext.append(c)
- return Vigenere(''.join(plaintext))
- class VigCrack(Vigenere):
- """
- VigCrack objects have methods to break Vigenere-encoded texts when the
- original key is unknown.
- The technique used is based on the one described in:
- http://www.stonehill.edu/compsci/Shai_papers/RSA.pdf
- (pages 9-10)
- Character frequencies taken from:
- http://www.csm.astate.edu/~rossa/datasec/frequency.html (English)
- http://www.characterfrequency.com/ (French, Italian, Portuguese, Spanish)
- http://www.santacruzpl.org/readyref/files/g-l/ltfrqger.shtml (German)
- """
- # Unless otherwise specified, test for codewords between (and including)
- # these two lengths:
- __default_min_codeword_length = 5
- __default_max_codeword_length = 9
- # The following are language-specific data on character frequencies.
- # Kappa is the "index of coincidence" described in the cryptography paper
- # (link above).
- __english_data = {
- 'A':8.167, 'B':1.492, 'C':2.782, 'D':4.253, 'E':12.702,
- 'F':2.228, 'G':2.015, 'H':6.094, 'I':6.996, 'J':0.153,
- 'K':0.772, 'L':4.025, 'M':2.406, 'N':6.749, 'O':7.507,
- 'P':1.929, 'Q':0.095, 'R':5.987, 'S':6.327, 'T':9.056,
- 'U':2.758, 'V':0.978, 'W':2.360, 'X':0.150, 'Y':1.974,
- 'Z':0.074, 'max_val':12.702, 'kappa':0.0667
- }
- __french_data = {
- 'A':8.11, 'B':0.903, 'C':3.49, 'D':4.27, 'E':17.22,
- 'F':1.14, 'G':1.09, 'H':0.769, 'I':7.44, 'J':0.339,
- 'K':0.097, 'L':5.53, 'M':2.89, 'N':7.46, 'O':5.38,
- 'P':3.02, 'Q':0.999, 'R':7.05, 'S':8.04, 'T':6.99,
- 'U':5.65, 'V':1.30, 'W':0.039, 'X':0.435, 'Y':0.271,
- 'Z':0.098, 'max_val':17.22, 'kappa':0.0746
- }
- __german_data = {
- 'A':6.506, 'B':2.566, 'C':2.837, 'D':5.414, 'E':16.693,
- 'F':2.044, 'G':3.647, 'H':4.064, 'I':7.812, 'J':0.191,
- 'K':1.879, 'L':2.825, 'M':3.005, 'N':9.905, 'O':2.285,
- 'P':0.944, 'Q':0.055, 'R':6.539, 'S':6.765, 'T':6.742,
- 'U':3.703, 'V':1.069, 'W':1.396, 'X':0.022, 'Y':0.032,
- 'Z':1.002, 'max_val':16.693, 'kappa':0.0767
- }
- __italian_data = {
- 'A':11.30, 'B':0.975, 'C':4.35, 'D':3.80, 'E':11.24,
- 'F':1.09, 'G':1.73, 'H':1.02, 'I':11.57, 'J':0.035,
- 'K':0.078, 'L':6.40, 'M':2.66, 'N':7.29, 'O':9.11,
- 'P':2.89, 'Q':0.391, 'R':6.68, 'S':5.11, 'T':6.76,
- 'U':3.18, 'V':1.52, 'W':0.00, 'X':0.024, 'Y':0.048,
- 'Z':0.958, 'max_val':11.57, 'kappa':0.0733
- }
- __portuguese_data = {
- 'A':13.89, 'B':0.980, 'C':4.18, 'D':5.24, 'E':12.72,
- 'F':1.01, 'G':1.17, 'H':0.905, 'I':6.70, 'J':0.317,
- 'K':0.0174, 'L':2.76, 'M':4.54, 'N':5.37, 'O':10.90,
- 'P':2.74, 'Q':1.06, 'R':6.67, 'S':7.90, 'T':4.63,
- 'U':4.05, 'V':1.55, 'W':0.0104, 'X':0.272, 'Y':0.0165,
- 'Z':0.400, 'max_val':13.89, 'kappa':0.0745
- }
- __spanish_data = {
- 'A':12.09, 'B':1.21, 'C':4.20, 'D':4.65, 'E':13.89,
- 'F':0.642, 'G':1.11, 'H':1.13, 'I':6.38, 'J':0.461,
- 'K':0.038, 'L':5.19, 'M':2.86, 'N':7.23, 'O':9.58,
- 'P':2.74, 'Q':1.37, 'R':6.14, 'S':7.43, 'T':4.49,
- 'U':4.53, 'V':1.05, 'W':0.011, 'X':0.124, 'Y':1.14,
- 'Z':0.324, 'max_val':13.89, 'kappa':0.0766
- }
- # The default language is set to English.
- __lang = 'EN'
- __lang_data = __english_data
- # This method sets the lang (__lang) attribute of a VigCrack object.
- def set_language(self, language):
- self.__lang = language.upper()
- if self.__lang == 'DE':
- self.__lang_data = self.__german_data
- elif self.__lang == 'ES':
- self.__lang_data = self.__spanish_data
- elif self.__lang == 'FR':
- self.__lang_data = self.__french_data
- elif self.__lang == 'IT':
- self.__lang_data = self.__italian_data
- elif self.__lang == 'PT':
- self.__lang_data = self.__portuguese_data
- else:
- self.__lang = 'EN'
- return self
- # Rotate text n places to the right, wrapping around at the end.
- def __rotate_right(self, n):
- cutting_point = len(self) - (n % len(self))
- return self[cutting_point:] + self[:cutting_point]
- # Get every nth char from a piece of text, from a given starting position.
- def __get_every_nth_char(self, start, n):
- accumulator = []
- for i in range(len(self)):
- if (i % n) == start:
- accumulator.append(self[i])
- return VigCrack(''.join(accumulator)).set_language(self.__lang)
- # Build a dictionary containing the number of occurrences of each char.
- def __count_char_freqs(self):
- dictionary = {}
- self = self.upper()
- for char in self:
- if char.isalpha():
- dictionary[char] = dictionary.get(char, 0) + 1
- return dictionary
- # Scale the dictionary so that it can be compared with __lang_data.
- def __scale(self, dictionary):
- v = dictionary.values()
- v.sort()
- max_val = v[-1]
- scaling_factor = self.__lang_data['max_val']/max_val
- for (k, v) in dictionary.items():
- dictionary[k] = v*scaling_factor
- return dictionary
- # The residual error is the difference between a char's frequency in
- # __lang_data and its frequency in the scaled dictionary from above.
- # The error is then squared to remove a possible negative value.
- def __sum_residuals_squared(self, dictionary):
- sum = 0
- for (k, v) in dictionary.items():
- sum += (v - self.__lang_data[k])**2
- return sum
- # Find the Caesar shift that brings the ciphertext closest to the
- # character distribution of the plaintext's language.
- def __find_best_caesar_shift(self):
- best = 0
- smallest_sum = -1
- # Find the residual sum for each shift.
- for shift in range(26):
- encoded_text = Caesar(self).encipher(shift)
- vigcrack_obj = VigCrack(encoded_text).set_language(self.__lang)
- char_freqs = vigcrack_obj.__count_char_freqs()
- scaled = vigcrack_obj.__scale(char_freqs)
- current_sum = vigcrack_obj.__sum_residuals_squared(scaled)
- # Keep track of the shift with the lowest residual sum.
- # If there's a tie, the smallest shift wins.
- if smallest_sum == -1:
- smallest_sum = current_sum
- if current_sum < smallest_sum:
- best = shift
- smallest_sum = current_sum
- return best
- def __find_codeword_length(self, min_length, max_length):
- codeword_length = min_length
- kappas = []
- # Put the kappa value for each codeword length tested into an array.
- for i in range(min_length, max_length + 1):
- temp = self.__rotate_right(i)
- coincidences = 0
- for j in range(len(self)):
- if temp[j] == self[j]:
- coincidences += 1
- kappas.append(float(coincidences)/len(self))
- # Find out which value of kappa is closest to the kappa of the
- # plaintext's language. If there's a tie, the shortest codeword wins.
- smallest_squared_diff = -1
- for i in range((max_length + 1) - min_length):
- current_squared_diff = (self.__lang_data['kappa'] - kappas[i])**2
- if smallest_squared_diff == -1:
- smallest_squared_diff = current_squared_diff
- if current_squared_diff < smallest_squared_diff:
- codeword_length = min_length + i
- smallest_squared_diff = current_squared_diff
- return codeword_length
- def __find_codeword(self, min_length, max_length):
- # Strip away invalid chars.
- accumulator = []
- for char in self:
- if char.isalpha():
- accumulator.append(char)
- alpha_only = VigCrack(''.join(accumulator)).set_language(self.__lang)
- codeword_length = alpha_only.__find_codeword_length(min_length,
- max_length)
- # Build the codeword by finding one character at a time.
- codeword = []
- for i in range(codeword_length):
- temp = alpha_only.__get_every_nth_char(i, codeword_length)
- shift = temp.__find_best_caesar_shift()
- if shift == 0:
- codeword.append('A')
- else:
- codeword.append(chr(ord('A') + (26 - shift)))
- return VigCrack(''.join(codeword)).set_language(self.__lang)
- def __parse_args(self, *arg_list):
- if len(arg_list) == 0: # Use default values for codeword length.
- min_length = self.__default_min_codeword_length
- max_length = self.__default_max_codeword_length
- elif len(arg_list) == 1: # Exact codeword length specified by user.
- min_length = max_length = int(arg_list[0])
- else: # min_length and max_length given by user.
- min_length = int(arg_list[0])
- max_length = int(arg_list[1])
- # Check for input errors.
- if min_length == max_length:
- if min_length < 1:
- raise InputError('Codeword length is too small')
- else:
- if min_length < 1:
- raise InputError('min_length is too small')
- if max_length < 1:
- raise InputError('max_length is too small')
- if max_length < min_length:
- raise InputError('max_length cannot be shorter than min_length')
- if len(self) == 0:
- raise InputError('Ciphertext is empty')
- if len(self) < max_length:
- raise InputError('Ciphertext is too short')
- # Check that the ciphertext contains at least one valid character.
- has_valid_char = False
- for char in self:
- if char.isalpha():
- has_valid_char = True
- break
- if not has_valid_char:
- raise InputError('No valid characters in ciphertext')
- # If everything's all right, return the min_length and max_length.
- return [min_length, max_length]
- def crack_codeword(self, *arg_list):
- """
- Try to find the codeword that encrypted the ciphertext object.
- If no arguments are supplied, codewords between the default minimum
- length and the default maximum length are tried.
- If one integer argument is supplied, only codewords with that length
- will be tried.
- If two integer arguments are given then the first argument is treated
- as a minimum codeword length, and the second argument is treated as a
- maximum codeword length, to try.
- """
- array = self.__parse_args(*arg_list)
- return self.__find_codeword(array[0], array[1])
- def crack_message(self, *arg_list):
- """
- Try to decode the ciphertext object.
- This method accepts arguments in the same way as the crack_codeword()
- method.
- """
- codeword = self.crack_codeword(*arg_list)
- return self.decipher(codeword)
- a = "OMQEMDUEQMEK"
- for key in range(1,27):
- print Caesar(a).decipher(key)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement