Advertisement
Guest User

Untitled

a guest
Jan 18th, 2017
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.61 KB | None | 0 0
  1. # vigerene.py
  2. import sys
  3.  
  4. englishLetterFreq = [8.17, 1.49, 2.78, 4.25, 12.7, 2.23, 2.02, 6.09, 6.97, 0.15, 0.77, 4.03, 2.41, 6.75, 7.51, 1.93, 0.1, 5.99, 6.33, 9.06, 2.76, 0.98, 2.36, 0.15, 1.97, 0.07]
  5. NUM_LETTERS_IN_ALPHABET = 26
  6. ASCII_DEC_START = 65
  7. ASCII_DEC_END = 90
  8.  
  9. # rotate array to the right
  10. def rotate(l, n):
  11. return l[n:] + l[:n]
  12.  
  13. # get the distribution percentage of the letters in the file
  14. def getDistribution(dist, total_len):
  15. # get percentage of occurrence for every letter
  16. for i in range(NUM_LETTERS_IN_ALPHABET):
  17. dist[i] = dist[i]/float(total_len)
  18.  
  19. return dist
  20.  
  21. # calculate the total frequency sum by multiplying corresponding English letter frequency with distribution percentage of letters in file
  22. def getTotalFreq(dist):
  23. total_frequency = 0
  24. for i in range(NUM_LETTERS_IN_ALPHABET):
  25. total_frequency += englishLetterFreq[i] * dist[i]
  26. return total_frequency
  27.  
  28. # create an array with 26 spots each representing the letters in the alphabet
  29. # count the number of letters occurring at a specific key location and add to distribution array
  30. # return the distribution count
  31. def frequency(content, key_length, key_pos):
  32. dist = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
  33. letter_count = 0
  34. for i in range(len(content)):
  35. if content[i].isalpha():
  36. letter_count += 1
  37. if letter_count % key_length == key_pos:
  38. curr_letter = content[i]
  39. dist[ord(curr_letter) - ASCII_DEC_START] += 1
  40.  
  41. return getDistribution(dist, letter_count)
  42.  
  43. def decodeVigereneKey(key_length):
  44. # take 2 argument as file name
  45. with open(str(sys.argv[1])) as f:
  46. content = f.readlines()
  47.  
  48. # strip white spaces and lowercase all letters
  49. # content = re.sub(r'([^\s\w]|_)+', "", original_content)
  50. content = ''.join([x.strip() for x in content]).upper()
  51. # rotate the content by specified key amount to find each letter
  52. key = []
  53. for j in range(key_length):
  54. max_freq = 0
  55. shifts = 0
  56. dist = frequency(content, key_length, j)
  57. # shift 26 times to find the max frequency of the 26 shifts and the number(letter) it corresponds to
  58. for i in range(NUM_LETTERS_IN_ALPHABET):
  59. curr_frequency = getTotalFreq(rotate(dist, i))
  60. # print str(i) + ' ' + str(curr_frequency)
  61. if max_freq < curr_frequency:
  62. max_freq = curr_frequency
  63. shifts = i
  64.  
  65. # add to key array
  66. key.append(chr(shifts + ASCII_DEC_START - 1))
  67. key = rotate(key, 1)
  68. return key
  69.  
  70. def decodeVigerene(key):
  71. # take 2 argument as file name
  72. with open(str(sys.argv[1])) as f:
  73. content = f.readlines()
  74.  
  75. # strip white spaces and lowercase all letters
  76. # content = re.sub(r'([^\s\w]|_)+', "", original_content)
  77. content = ''.join([x.strip() for x in content]).upper()
  78. decoded_content = ''
  79. key_pos = 0
  80. for i in range(len(content)):
  81. if content[i].isalpha():
  82. # letter will loop back from end of alphabet
  83. if ord(content[i]) - ord(key[key_pos % len(key)]) < 0:
  84. decoded_content += chr(ASCII_DEC_END + ord(content[i]) - ord(key[key_pos % len(key)]))
  85. # letter subtracted
  86. else:
  87. decoded_content += chr(ASCII_DEC_START - 1 + ord(content[i]) - ord(key[key_pos % len(key)]))
  88. key_pos += 1
  89. else:
  90. decoded_content += content[i]
  91.  
  92. return decoded_content
  93.  
  94.  
  95. # repeat to determine length of key
  96. print decodeVigereneKey(13)
  97. print decodeVigerene(decodeVigereneKey(13))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement