Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- englishLetterFreq = [8.17, 1.49, 2.78, 4.25, 12.7, 2.23, 2.02, 6.09, 6.97, 0.15, 0.77, 4.03, 2.41, 6.75, 7.51, 1.93, 0.1, 5.99, 6.33, 9.06, 2.76, 0.98, 2.36, 0.15, 1.97, 0.07]
- NUM_LETTERS_IN_ALPHABET = 26
- ASCII_DEC_START = 65
- ASCII_DEC_END = 90
- # rotate array to the right
- def rotate(l, n):
- return l[n:] + l[:n]
- # get the distribution percentage of the letters in the file
- def getDistribution(dist, total_len):
- # get percentage of occurrence for every letter
- for i in range(NUM_LETTERS_IN_ALPHABET):
- dist[i] = dist[i]/float(total_len)
- return dist
- # calculate the total frequency sum by multiplying corresponding English letter frequency with distribution percentage of letters in file
- def getTotalFreq(dist):
- total_frequency = 0
- for i in range(NUM_LETTERS_IN_ALPHABET):
- total_frequency += englishLetterFreq[i] * dist[i]
- return total_frequency
- # create an array with 26 spots each representing the letters in the alphabet
- # count the number of letters occurring at a specific key location and add to distribution array
- # return the distribution count
- def frequency(content, key_length, key_pos):
- dist = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
- letter_count = 0
- for i in range(len(content)):
- if content[i].isalpha():
- letter_count += 1
- if letter_count % key_length == key_pos:
- curr_letter = content[i]
- dist[ord(curr_letter) - ASCII_DEC_START] += 1
- return getDistribution(dist, letter_count)
- def decodeVigereneKey(key_length):
- # take 2 argument as file name
- with open(str(sys.argv[1])) as f:
- content = f.readlines()
- # strip white spaces and lowercase all letters
- # content = re.sub(r'([^\s\w]|_)+', "", original_content)
- content = ''.join([x.strip() for x in content]).upper()
- # rotate the content by specified key amount to find each letter
- key = []
- for j in range(key_length):
- max_freq = 0
- shifts = 0
- dist = frequency(content, key_length, j)
- # shift 26 times to find the max frequency of the 26 shifts and the number(letter) it corresponds to
- for i in range(NUM_LETTERS_IN_ALPHABET):
- curr_frequency = getTotalFreq(rotate(dist, i))
- # print str(i) + ' ' + str(curr_frequency)
- if max_freq < curr_frequency:
- max_freq = curr_frequency
- shifts = i
- # add to key array
- key.append(chr(shifts + ASCII_DEC_START - 1))
- key = rotate(key, 1)
- return key
- def decodeVigerene(key):
- # take 2 argument as file name
- with open(str(sys.argv[1])) as f:
- content = f.readlines()
- # strip white spaces and lowercase all letters
- # content = re.sub(r'([^\s\w]|_)+', "", original_content)
- content = ''.join([x.strip() for x in content]).upper()
- decoded_content = ''
- key_pos = 0
- for i in range(len(content)):
- if content[i].isalpha():
- # letter will loop back from end of alphabet
- if ord(content[i]) - ord(key[key_pos % len(key)]) < 0:
- decoded_content += chr(ASCII_DEC_END + ord(content[i]) - ord(key[key_pos % len(key)]))
- # letter subtracted
- else:
- decoded_content += chr(ASCII_DEC_START - 1 + ord(content[i]) - ord(key[key_pos % len(key)]))
- key_pos += 1
- else:
- decoded_content += content[i]
- return decoded_content
- # repeat to determine length of key
- print decodeVigereneKey(13)
- print decodeVigerene(decodeVigereneKey(13))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement