Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import operator
- import math
- def frequencyAnalysis(ciphertext):
- letterDict = {}
- for c in ciphertext:
- if c in letterDict:
- letterDict[c] += 1
- else:
- letterDict[c] = 1
- digramDict = {}
- trigramDict = {}
- for i in range(len(ciphertext)):
- if i + 2 > len(ciphertext):
- break
- if ciphertext[i:i+2] in digramDict:
- digramDict[ciphertext[i:i+2]] +=1
- else:
- digramDict[ciphertext[i:i+2]] = 1
- for i in range(len(ciphertext)):
- if i + 3 > len(ciphertext):
- break
- if ciphertext[i:i+3] in trigramDict:
- trigramDict[ciphertext[i:i+3]] +=1
- else:
- trigramDict[ciphertext[i:i+3]] = 1
- sortedDict = sorted(letterDict.items(), key=operator.itemgetter(1))
- sortedDigramDict = sorted(digramDict.items(), key=operator.itemgetter(1))
- sortedTrigramDict = sorted(trigramDict.items(), key=operator.itemgetter(1))
- print("""
- -----------------------------------------------------------------------
- | |
- | Letter frequencies |
- | |
- -----------------------------------------------------------------------""")
- print(sortedDict)
- print("""
- -----------------------------------------------------------------------
- | |
- | END OF LETTER FREQUENCIES |
- | |
- -----------------------------------------------------------------------""")
- print("""
- -----------------------------------------------------------------------
- | |
- | Digram Frequencies |
- | |
- -----------------------------------------------------------------------""")
- print(sortedDigramDict)
- print("""
- -----------------------------------------------------------------------
- | |
- | END OF DIGRAM FREQUENCIES |
- | |
- -----------------------------------------------------------------------""")
- print("""
- -----------------------------------------------------------------------
- | |
- | Trigram Frequencies |
- | |
- -----------------------------------------------------------------------""")
- print(sortedTrigramDict)
- print("""
- ------------------------------------------------------------------------
- | |
- | END OF TRIGRAM FREQUENCIES |
- | |
- ------------------------------------------------------------------------""")
- def shiftBreaker(ciphertext):
- for i in range(0, 26):
- decoder = ''
- for c in ciphertext:
- decoder = decoder + chr(97 + (ord(c) - i) % 26)
- print('-' + str(i) + ':\t' + decoder + '\n')
- def rotate(array):
- array = array[-1:] + array[:-1]
- return array
- def count_letters(text):
- # Create a list that can hold one value for each letter in the alphabet
- freq_list = [0] * 26
- for c in text: # For every letter add one to its spot in the freq_list
- if c.isalpha():
- c = c.lower()
- freq_list[ord(c) - ord('a')] += 1
- return freq_list
- def index_of_coincidence(freq_list):
- # Get the total number of letters
- N = sum(freq_list)
- f_sum = 0
- for i in freq_list: # Calculate this sum: Ei(Ei - 1)
- f_sum += i * (i - 1)
- return f_sum / (N * (N - 1)) # Return this: sum(Ei(Ei-1)) / (N(N-1))
- def approximate_key_length(ic_eng, ic_rand, ic_cipher, N):
- t = 0.0265 * N
- n = (ic_eng - ic_cipher) + N * (ic_cipher - ic_rand)
- return t / n
- def shift_string(steps, text):
- # Store values for the first and last letter in the alphabet
- a_val = ord('a')
- z_val = ord('z')
- out_text = ''
- for c in text: # For every letter add the steps
- if c.isalpha():
- c = c.lower()
- new_c_val = ord(c) - steps
- while new_c_val < a_val:
- new_c_val = (z_val + 1) - (a_val - new_c_val)
- while new_c_val > z_val:
- new_c_val = (a_val - 1) + (new_c_val - z_val)
- out_text += chr(new_c_val)
- return out_text
- def goodness_of_fit(text, actual_freqs):
- freqs = count_letters(text)
- N = sum(freqs)
- freqs = [ x / N for x in freqs ] # Get the probabillity for every letter
- N = sum(actual_freqs)
- a_freqs = [ x / N for x in actual_freqs ] # Get the probabillity for every letter
- res = 0
- for i in range(0, len(a_freqs)): # Calculate this: sum(((Fi-Ai)^2)/Ai)
- res += ((freqs[i] - a_freqs[i]) * (freqs[i] - a_freqs[i])) / a_freqs[i]
- return res
- def find_key(key_length, text, actual_freqs):
- columns = [''] * key_length # Create columns for every letter in the key
- current_column = 0
- for c in text: # Add the letters to the columns with their pos % key_length
- if c.isalpha():
- columns[current_column] += c
- current_column += 1
- if current_column >= key_length:
- current_column = 0
- key = ''
- fit_list = []
- for col in columns: # For every column
- fit = 1000000000 # Set Fittnes at a large value
- i_val = 0 # Index of best fittnes
- for i in range(0, 26): # 26 possible shifts
- shift_col = shift_string(i, col) # Shift the column
- # Get the fittnes of the shifted column
- cur_fit = goodness_of_fit(shift_col, actual_freqs)
- # If the fittnes is better then the last fittnes then this is the new fittnes
- if cur_fit < fit:
- fit = cur_fit
- i_val = i
- # Add the shift letter to the key
- key += chr(ord('a') + i_val)
- # Store the best fittnes for this column
- fit_list.append(fit)
- # Return the key with the avrage of the fittnes
- return (key, sum(fit_list) / float(len(fit_list)))
- def get_plain_text(text, key):
- current_key_letter = 0
- out_text = ''
- for c in text:
- if c.isalpha(): # For every letter shift it according to the key
- out_text += shift_string(ord(key[current_key_letter]) - ord('a'), c)
- current_key_letter += 1
- if current_key_letter >= len(key):
- current_key_letter = 0
- else:
- out_text += c
- return out_text
- def chiSquaredTest(ciphertext, keyLength):
- frequencies = [ 0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, 0.06094, 0.06966, 0.00153,
- 0.00772, 0.04025, 0.02406, 0.06749, 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056,
- 0.02758, 0.00978, 0.02360, 0.00150, 0.01974, 0.00074]
- columns = [''] * keyLength
- for c in range(0, len(ciphertext), keyLength):
- for i in range(keyLength):
- if c + i >= len(ciphertext):
- break
- columns[i] = columns[i] + ciphertext[c + i]
- for i in range(len(columns)):
- obs = {'a': 0, 'b': 0, 'c': 0, 'd': 0, 'e': 0, 'f': 0, 'g': 0, 'h': 0, 'i': 0, 'j': 0, 'k': 0, 'l': 0, 'm': 0, 'n': 0, 'o': 0, 'p': 0, 'q': 0, 'r': 0, 's': 0, 't': 0, 'u': 0, 'v': 0, 'w': 0, 'x': 0, 'y': 0, 'z': 0}
- for c in columns[i]:
- obs[c] += 1
- N = len(columns[i])
- print(obs)
- chiSquareValue = [0] * 26
- for j in range(26):
- counter = 0
- for o, c in obs.items():
- #print(str(frequencies[counter] * N) + ' vs ' + str(c))
- chiSquareValue[j] = chiSquareValue[j] + float((math.pow(float(c) - frequencies[counter] * N, 2)) / (frequencies[counter] * N))
- counter += 1
- frequencies = rotate(frequencies)
- print("Goodnes of fit:\n")
- counter = 0
- for c in chiSquareValue:
- print('Shift ' + str(counter) + ':\t' + str(c))
- counter += 1
- def main():
- print("please enter the ciphertext. Hit enter and then Crtl + D to save it :)")
- userInput = []
- while True:
- try:
- line = input()
- except EOFError:
- break
- userInput.append(line)
- cipher = ''
- for i in range(len(userInput)):
- for u in userInput[i]:
- if u == ' ':
- continue
- else:
- cipher = cipher + u
- cipher = cipher.lower()
- done = False
- # Read an example english text from a file
- example_file = open('example_text.txt', 'r')
- ex_text = example_file.read()
- example_file.close()
- # Calculate frequency of the english text
- ex_freq_list = count_letters(ex_text)
- # Calculate index of coincidence of the english text
- ex_ic = index_of_coincidence(ex_freq_list)
- while not done:
- selection = input("""What would you like to do?
- 1- Decode a shift cipher
- 2- perform a frequency analysis
- 3- Perform a chi-squared test
- 4- Decode a viginére cipher
- """)
- if selection == '1':
- done = True
- shiftBreaker(cipher)
- elif selection == '2':
- done = True
- frequencyAnalysis(cipher)
- elif selection == '3':
- done = True
- keyLength = int(input('Enter key length:\n'))
- chiSquaredTest(cipher, keyLength)
- elif selection == '4':
- # Calculate frequency of the cipher text
- cipher_freq_list = count_letters(cipher)
- # Calculate index of coincidence of the cipher text
- cipher_ic = index_of_coincidence(cipher_freq_list)
- # Approximate the key length using 0.0385 as the
- # index of coincidence of random text
- key_len = approximate_key_length(ex_ic, 0.0385, cipher_ic, \
- sum(cipher_freq_list))
- print(key_len)
- # Round the key length to an integer
- key_len = int(key_len + 0.5)
- # Get a key with the approximate key length
- key, key_fit = find_key(key_len, cipher, ex_freq_list)
- # Get a key with the approximate key length + 1
- k, f = find_key(key_len + 1, cipher, ex_freq_list)
- if key_fit > f: # If the avrage fittnes of this key is better than the last one switch them
- key = k
- key_fit = f
- # Get a key with the approximate key length - 1
- k, f = find_key(key_len - 1, cipher, ex_freq_list)
- if key_fit > f: # If the avrage fittnes of this key is better than the last one switch them
- key = k
- key_fit = f
- # Print the key with the best fittnes and then decipher the text
- print(key)
- print(get_plain_text(cipher, key))
- key = input('Try a different key: ')
- print(key)
- print(get_plain_text(cipher, key))
- else:
- print('Please enter a valid character!')
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement