Guest User

Untitled

a guest
May 12th, 2020
268
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.23 KB | None | 0 0
  1. from collections import Counter
  2. from string import ascii_lowercase
  3.  
  4. import numpy as np
  5.  
  6. # fmt: off
  7. DIGRAM_FREQS_ENGLISH = np.array([
  8.     [0.003, 0.230, 0.448, 0.368, 0.012, 0.074, 0.205, 0.014, 0.316, 0.012, 0.105, 1.087, 0.285, 1.985, 0.005, 0.203, 0.002, 1.075, 0.871, 1.487, 0.119, 0.205, 0.060, 0.019, 0.217, 0.012],
  9.     [0.146, 0.011, 0.002, 0.002, 0.576, 0.000, 0.000, 0.001, 0.107, 0.023, 0.000, 0.233, 0.003, 0.002, 0.195, 0.001, 0.000, 0.112, 0.046, 0.017, 0.185, 0.004, 0.000, 0.000, 0.176, 0.000],
  10.     [0.538, 0.001, 0.083, 0.002, 0.651, 0.001, 0.001, 0.598, 0.281, 0.000, 0.118, 0.149, 0.003, 0.001, 0.794, 0.001, 0.005, 0.149, 0.023, 0.461, 0.163, 0.000, 0.000, 0.000, 0.042, 0.001],
  11.     [0.151, 0.003, 0.003, 0.043, 0.765, 0.003, 0.031, 0.005, 0.493, 0.005, 0.000, 0.032, 0.018, 0.008, 0.188, 0.002, 0.001, 0.085, 0.126, 0.003, 0.148, 0.019, 0.008, 0.000, 0.050, 0.000],
  12.     [0.688, 0.027, 0.477, 1.168, 0.378, 0.163, 0.120, 0.026, 0.183, 0.005, 0.016, 0.530, 0.374, 1.454, 0.073, 0.172, 0.057, 2.048, 1.339, 0.413, 0.031, 0.255, 0.117, 0.214, 0.144, 0.005],
  13.     [0.164, 0.000, 0.001, 0.000, 0.237, 0.146, 0.001, 0.000, 0.285, 0.000, 0.000, 0.065, 0.001, 0.000, 0.488, 0.000, 0.000, 0.213, 0.006, 0.082, 0.096, 0.000, 0.000, 0.000, 0.009, 0.000],
  14.     [0.148, 0.000, 0.000, 0.003, 0.385, 0.001, 0.025, 0.228, 0.152, 0.000, 0.000, 0.061, 0.010, 0.066, 0.132, 0.000, 0.000, 0.197, 0.051, 0.015, 0.086, 0.000, 0.001, 0.000, 0.026, 0.000],
  15.     [0.926, 0.004, 0.001, 0.003, 3.075, 0.002, 0.000, 0.001, 0.763, 0.000, 0.000, 0.013, 0.013, 0.026, 0.485, 0.001, 0.000, 0.084, 0.015, 0.130, 0.074, 0.000, 0.005, 0.000, 0.050, 0.000],
  16.     [0.286, 0.099, 0.699, 0.296, 0.385, 0.203, 0.255, 0.002, 0.023, 0.001, 0.043, 0.432, 0.318, 2.433, 0.835, 0.089, 0.011, 0.315, 1.128, 1.123, 0.017, 0.288, 0.001, 0.022, 0.000, 0.064],
  17.     [0.026, 0.000, 0.000, 0.000, 0.052, 0.000, 0.000, 0.000, 0.003, 0.000, 0.000, 0.000, 0.000, 0.000, 0.054, 0.000, 0.000, 0.000, 0.000, 0.000, 0.059, 0.000, 0.000, 0.000, 0.000, 0.000],
  18.     [0.017, 0.001, 0.000, 0.001, 0.214, 0.002, 0.003, 0.003, 0.098, 0.000, 0.000, 0.011, 0.002, 0.051, 0.006, 0.001, 0.000, 0.003, 0.048, 0.001, 0.003, 0.000, 0.002, 0.000, 0.006, 0.000],
  19.     [0.528, 0.007, 0.012, 0.253, 0.829, 0.053, 0.006, 0.002, 0.624, 0.000, 0.020, 0.577, 0.023, 0.006, 0.387, 0.019, 0.000, 0.010, 0.142, 0.124, 0.135, 0.035, 0.013, 0.000, 0.425, 0.000],
  20.     [0.565, 0.090, 0.004, 0.001, 0.793, 0.004, 0.001, 0.001, 0.318, 0.000, 0.000, 0.005, 0.096, 0.009, 0.337, 0.239, 0.000, 0.003, 0.093, 0.001, 0.115, 0.000, 0.001, 0.000, 0.062, 0.000],
  21.     [0.347, 0.004, 0.416, 1.352, 0.692, 0.067, 0.953, 0.011, 0.339, 0.011, 0.052, 0.064, 0.028, 0.073, 0.465, 0.006, 0.006, 0.009, 0.509, 1.041, 0.079, 0.052, 0.006, 0.003, 0.098, 0.004],
  22.     [0.057, 0.097, 0.166, 0.195, 0.039, 1.175, 0.094, 0.021, 0.088, 0.007, 0.064, 0.365, 0.546, 1.758, 0.210, 0.224, 0.001, 1.277, 0.290, 0.442, 0.870, 0.178, 0.330, 0.019, 0.036, 0.003],
  23.     [0.324, 0.001, 0.001, 0.001, 0.478, 0.001, 0.000, 0.094, 0.123, 0.000, 0.001, 0.263, 0.016, 0.001, 0.361, 0.137, 0.000, 0.474, 0.055, 0.106, 0.105, 0.000, 0.001, 0.000, 0.012, 0.000],
  24.     [0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.148, 0.000, 0.000, 0.000, 0.000, 0.000],
  25.     [0.686, 0.027, 0.121, 0.189, 1.854, 0.032, 0.100, 0.015, 0.728, 0.001, 0.097, 0.086, 0.175, 0.160, 0.727, 0.042, 0.001, 0.121, 0.397, 0.362, 0.128, 0.069, 0.013, 0.001, 0.248, 0.001],
  26.     [0.218, 0.008, 0.155, 0.005, 0.932, 0.017, 0.002, 0.315, 0.550, 0.000, 0.039, 0.056, 0.065, 0.009, 0.398, 0.191, 0.007, 0.006, 0.405, 1.053, 0.311, 0.001, 0.024, 0.000, 0.057, 0.000],
  27.     [0.530, 0.003, 0.026, 0.001, 1.205, 0.006, 0.002, 3.556, 1.343, 0.000, 0.000, 0.098, 0.026, 0.010, 1.041, 0.004, 0.000, 0.426, 0.337, 0.171, 0.255, 0.001, 0.082, 0.000, 0.227, 0.004],
  28.     [0.136, 0.089, 0.188, 0.091, 0.147, 0.019, 0.128, 0.001, 0.101, 0.001, 0.005, 0.346, 0.138, 0.394, 0.011, 0.136, 0.000, 0.543, 0.454, 0.405, 0.001, 0.003, 0.000, 0.004, 0.005, 0.002],
  29.     [0.140, 0.000, 0.000, 0.000, 0.825, 0.000, 0.000, 0.000, 0.270, 0.000, 0.000, 0.000, 0.000, 0.000, 0.071, 0.000, 0.000, 0.001, 0.001, 0.000, 0.002, 0.000, 0.000, 0.000, 0.005, 0.000],
  30.     [0.385, 0.001, 0.001, 0.004, 0.361, 0.002, 0.000, 0.379, 0.374, 0.000, 0.001, 0.015, 0.001, 0.079, 0.222, 0.001, 0.000, 0.031, 0.035, 0.007, 0.001, 0.000, 0.000, 0.000, 0.002, 0.000],
  31.     [0.030, 0.000, 0.026, 0.000, 0.022, 0.002, 0.000, 0.004, 0.039, 0.000, 0.000, 0.001, 0.000, 0.000, 0.003, 0.067, 0.000, 0.000, 0.000, 0.047, 0.005, 0.002, 0.000, 0.003, 0.003, 0.000],
  32.     [0.016, 0.004, 0.014, 0.007, 0.093, 0.001, 0.003, 0.001, 0.029, 0.000, 0.000, 0.015, 0.024, 0.013, 0.150, 0.025, 0.000, 0.008, 0.097, 0.017, 0.001, 0.000, 0.003, 0.000, 0.000, 0.002],
  33.     [0.025, 0.000, 0.000, 0.000, 0.050, 0.000, 0.000, 0.001, 0.012, 0.000, 0.000, 0.001, 0.000, 0.000, 0.007, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002, 0.000, 0.000, 0.000, 0.002, 0.003],
  34. ])
  35. # fmt: on
  36.  
  37.  
  38. def _score(matrix):
  39.     """Calculate score from known matrix of English digram frequencies."""
  40.  
  41.     return abs(matrix - DIGRAM_FREQS_ENGLISH).sum()
  42.  
  43.  
  44. def _get_plaintext(ciphertext, decryption_key):
  45.     # The decryption key will be in order of most common first, so we need to construct
  46.     # a list of indices where to insert each to get an "alphabetical key" instead.
  47.     indices = [ascii_lowercase.index(letter) for letter in "etaoinsrhldcumfpgwybvkxjqz"]
  48.  
  49.     translation_table = {}
  50.  
  51.     for cipher_letter, index in zip(decryption_key, indices):
  52.         plain_letter = ascii_lowercase[index]
  53.         translation_table[cipher_letter] = plain_letter
  54.  
  55.     return "".join([translation_table[c] for c in ciphertext])
  56.  
  57.  
  58. def _get_digram_frequencies(text):
  59.     frequencies = np.zeros((26, 26))
  60.  
  61.     text = text.lower()
  62.     text_length = len(text)
  63.  
  64.     # First, count the number of occurrences of each letter and save to the index that
  65.     # corresponds to the letter pair, i.e. frequencies[0, 0] is for "aa" etc.
  66.     for i in range(0, text_length - 1):
  67.         a = ascii_lowercase.index(text[i])
  68.         b = ascii_lowercase.index(text[i + 1])
  69.         frequencies[a, b] += 1
  70.  
  71.     # Replace each entry with a percentage of the total text length, to get the same
  72.     # format as the English digram frequencies.
  73.     rows, columns = frequencies.shape
  74.     for i in range(rows):
  75.         for j in range(columns):
  76.             # All digram frequencies are in percentages, so convert it here too.
  77.             frequencies[i, j] = 100 * frequencies[i, j] / text_length
  78.  
  79.     return frequencies
  80.  
  81.  
  82. def _swap(matrix, index1, index2):
  83.     matrix[[index1, index2]] = matrix[[index2, index1]]  # Swap rows
  84.     matrix[:, [index1, index2]] = matrix[:, [index2, index1]]  # Swap columns
  85.  
  86.  
  87. ciphertext = (
  88.     "uknkgmhksztkmexmpbxtgxesxekeskvuakgluepbhvpmhhpvxtwhphmvydmrbuthhgkfxgse"
  89.     "xmpbhmeymhhohwgkzdwxenzfbhhlvabufbymkfhvnzehmihvkestuggvnhaupbshgurbpsxz"
  90.     "xddeshmvpkespbuvthhguerpbuvymhhohabufbbkvpmkihgghstmxnpbhmhruxevpxakmsva"
  91.     "bufbuknksikefuerruihvnhktxmhpkvphxtpbxvhufzfgunhvuevwumuphsyzpbuvauesxtw"
  92.     "mxnuvhnzskzs"
  93. )
  94.  
  95. print(f"\nCiphertext:\n{ciphertext}")
  96.  
  97. # Initial key is the letters in the ciphertext in order of most common first, and with
  98. # remaining letters (not present in ciphertext) added at the end.
  99. c = Counter(ciphertext)
  100. key = [letter[0] for letter in c.most_common()]
  101.  
  102. for c in ascii_lowercase:
  103.     if c not in key:
  104.         key += c
  105.  
  106. print(f"\nInitial Key:\n{''.join(key)}")
  107.  
  108. # Algorithm starts here.
  109.  
  110. putative_plaintext = _get_plaintext(ciphertext, key)
  111. digram_frequencies = _get_digram_frequencies(putative_plaintext)
  112.  
  113. best_score = _score(digram_frequencies)
  114. print(f"\nInitial score is {best_score}")
  115. print("Starting climb algorithm\n")
  116.  
  117. for i in range(1, 26):
  118.     for j in range(26 - i):
  119.         d = np.copy(digram_frequencies)
  120.         _swap(d, j, j + i)
  121.         score = _score(d)
  122.         if score < best_score:
  123.             digram_frequencies = np.copy(d)
  124.             key[j], key[j + i] = key[j + i], key[j]
  125.             best_score = score
  126.             print(f"Got new best score {score:.02f} and key is now {''.join(key)}")
  127.  
  128. print(f"\nFinal Key:\n{''.join(key)}")
  129.  
  130. plaintext = _get_plaintext(ciphertext, key)
  131.  
  132. print(f"\nPlaintext:\n{plaintext}\n")
Add Comment
Please, Sign In to add comment