Advertisement
Guest User

Untitled

a guest
Jan 18th, 2017
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.59 KB | None | 0 0
  1. import sys
  2.  
  3. englishLetterFreq = [8.17, 1.49, 2.78, 4.25, 12.7, 2.23, 2.02, 6.09, 6.97, 0.15, 0.77, 4.03, 2.41, 6.75, 7.51, 1.93, 0.1, 5.99, 6.33, 9.06, 2.76, 0.98, 2.36, 0.15, 1.97, 0.07]
  4. NUM_LETTERS_IN_ALPHABET = 26
  5. ASCII_DEC_START = 65
  6. ASCII_DEC_END = 90
  7.  
  8. # rotate array to the right
  9. def rotate(l, n):
  10.     return l[n:] + l[:n]
  11.  
  12. # get the distribution percentage of the letters in the file
  13. def getDistribution(dist, total_len):
  14.     # get percentage of occurrence for every letter
  15.     for i in range(NUM_LETTERS_IN_ALPHABET):
  16.         dist[i] = dist[i]/float(total_len)
  17.  
  18.     return dist
  19.  
  20. # calculate the total frequency sum by multiplying corresponding English letter frequency with distribution percentage of letters in file
  21. def getTotalFreq(dist):
  22.     total_frequency = 0
  23.     for i in range(NUM_LETTERS_IN_ALPHABET):
  24.         total_frequency += englishLetterFreq[i] * dist[i]
  25.     return total_frequency
  26.  
  27. # create an array with 26 spots each representing the letters in the alphabet
  28. # count the number of letters occurring at a specific key location and add to distribution array
  29. # return the distribution count
  30. def frequency(content, key_length, key_pos):
  31.     dist = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
  32.     letter_count = 0
  33.     for i in range(len(content)):
  34.         if content[i].isalpha():
  35.             letter_count += 1
  36.             if letter_count % key_length == key_pos:
  37.                 curr_letter = content[i]
  38.                 dist[ord(curr_letter) - ASCII_DEC_START] += 1
  39.  
  40.     return getDistribution(dist, letter_count)
  41.  
  42. def decodeVigereneKey(key_length):
  43.     # take 2 argument as file name
  44.     with open(str(sys.argv[1])) as f:
  45.         content = f.readlines()
  46.  
  47.     # strip white spaces and lowercase all letters
  48.     # content = re.sub(r'([^\s\w]|_)+', "", original_content)
  49.     content = ''.join([x.strip() for x in content]).upper()
  50.     # rotate the content by specified key amount to find each letter
  51.     key = []
  52.     for j in range(key_length):
  53.         max_freq = 0
  54.         shifts = 0
  55.         dist = frequency(content, key_length, j)
  56.         # shift 26 times to find the max frequency of the 26 shifts and the number(letter) it corresponds to
  57.         for i in range(NUM_LETTERS_IN_ALPHABET):
  58.             curr_frequency = getTotalFreq(rotate(dist, i))
  59.             # print str(i) + ' ' + str(curr_frequency)
  60.             if max_freq < curr_frequency:
  61.                 max_freq = curr_frequency
  62.                 shifts = i
  63.  
  64.         # add to key array
  65.         key.append(chr(shifts + ASCII_DEC_START - 1))
  66.     key = rotate(key, 1)
  67.     return key
  68.  
  69. def decodeVigerene(key):
  70.     # take 2 argument as file name
  71.     with open(str(sys.argv[1])) as f:
  72.         content = f.readlines()
  73.  
  74.     # strip white spaces and lowercase all letters
  75.     # content = re.sub(r'([^\s\w]|_)+', "", original_content)
  76.     content = ''.join([x.strip() for x in content]).upper()
  77.     decoded_content = ''
  78.     key_pos = 0
  79.     for i in range(len(content)):
  80.         if content[i].isalpha():
  81.             # letter will loop back from end of alphabet
  82.             if ord(content[i]) - ord(key[key_pos % len(key)]) < 0:
  83.                 decoded_content += chr(ASCII_DEC_END + ord(content[i]) - ord(key[key_pos % len(key)]))
  84.             # letter subtracted
  85.             else:
  86.                 decoded_content += chr(ASCII_DEC_START - 1 + ord(content[i]) - ord(key[key_pos % len(key)]))
  87.             key_pos += 1
  88.         else:
  89.             decoded_content += content[i]
  90.  
  91.     return decoded_content
  92.  
  93.  
  94. # repeat to determine length of key
  95. print decodeVigereneKey(13)
  96. print decodeVigerene(decodeVigereneKey(13))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement