Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import Counter
- from string import ascii_lowercase
- import numpy as np
- # fmt: off
- DIGRAM_FREQS_ENGLISH = np.array([
- [0.003, 0.230, 0.448, 0.368, 0.012, 0.074, 0.205, 0.014, 0.316, 0.012, 0.105, 1.087, 0.285, 1.985, 0.005, 0.203, 0.002, 1.075, 0.871, 1.487, 0.119, 0.205, 0.060, 0.019, 0.217, 0.012],
- [0.146, 0.011, 0.002, 0.002, 0.576, 0.000, 0.000, 0.001, 0.107, 0.023, 0.000, 0.233, 0.003, 0.002, 0.195, 0.001, 0.000, 0.112, 0.046, 0.017, 0.185, 0.004, 0.000, 0.000, 0.176, 0.000],
- [0.538, 0.001, 0.083, 0.002, 0.651, 0.001, 0.001, 0.598, 0.281, 0.000, 0.118, 0.149, 0.003, 0.001, 0.794, 0.001, 0.005, 0.149, 0.023, 0.461, 0.163, 0.000, 0.000, 0.000, 0.042, 0.001],
- [0.151, 0.003, 0.003, 0.043, 0.765, 0.003, 0.031, 0.005, 0.493, 0.005, 0.000, 0.032, 0.018, 0.008, 0.188, 0.002, 0.001, 0.085, 0.126, 0.003, 0.148, 0.019, 0.008, 0.000, 0.050, 0.000],
- [0.688, 0.027, 0.477, 1.168, 0.378, 0.163, 0.120, 0.026, 0.183, 0.005, 0.016, 0.530, 0.374, 1.454, 0.073, 0.172, 0.057, 2.048, 1.339, 0.413, 0.031, 0.255, 0.117, 0.214, 0.144, 0.005],
- [0.164, 0.000, 0.001, 0.000, 0.237, 0.146, 0.001, 0.000, 0.285, 0.000, 0.000, 0.065, 0.001, 0.000, 0.488, 0.000, 0.000, 0.213, 0.006, 0.082, 0.096, 0.000, 0.000, 0.000, 0.009, 0.000],
- [0.148, 0.000, 0.000, 0.003, 0.385, 0.001, 0.025, 0.228, 0.152, 0.000, 0.000, 0.061, 0.010, 0.066, 0.132, 0.000, 0.000, 0.197, 0.051, 0.015, 0.086, 0.000, 0.001, 0.000, 0.026, 0.000],
- [0.926, 0.004, 0.001, 0.003, 3.075, 0.002, 0.000, 0.001, 0.763, 0.000, 0.000, 0.013, 0.013, 0.026, 0.485, 0.001, 0.000, 0.084, 0.015, 0.130, 0.074, 0.000, 0.005, 0.000, 0.050, 0.000],
- [0.286, 0.099, 0.699, 0.296, 0.385, 0.203, 0.255, 0.002, 0.023, 0.001, 0.043, 0.432, 0.318, 2.433, 0.835, 0.089, 0.011, 0.315, 1.128, 1.123, 0.017, 0.288, 0.001, 0.022, 0.000, 0.064],
- [0.026, 0.000, 0.000, 0.000, 0.052, 0.000, 0.000, 0.000, 0.003, 0.000, 0.000, 0.000, 0.000, 0.000, 0.054, 0.000, 0.000, 0.000, 0.000, 0.000, 0.059, 0.000, 0.000, 0.000, 0.000, 0.000],
- [0.017, 0.001, 0.000, 0.001, 0.214, 0.002, 0.003, 0.003, 0.098, 0.000, 0.000, 0.011, 0.002, 0.051, 0.006, 0.001, 0.000, 0.003, 0.048, 0.001, 0.003, 0.000, 0.002, 0.000, 0.006, 0.000],
- [0.528, 0.007, 0.012, 0.253, 0.829, 0.053, 0.006, 0.002, 0.624, 0.000, 0.020, 0.577, 0.023, 0.006, 0.387, 0.019, 0.000, 0.010, 0.142, 0.124, 0.135, 0.035, 0.013, 0.000, 0.425, 0.000],
- [0.565, 0.090, 0.004, 0.001, 0.793, 0.004, 0.001, 0.001, 0.318, 0.000, 0.000, 0.005, 0.096, 0.009, 0.337, 0.239, 0.000, 0.003, 0.093, 0.001, 0.115, 0.000, 0.001, 0.000, 0.062, 0.000],
- [0.347, 0.004, 0.416, 1.352, 0.692, 0.067, 0.953, 0.011, 0.339, 0.011, 0.052, 0.064, 0.028, 0.073, 0.465, 0.006, 0.006, 0.009, 0.509, 1.041, 0.079, 0.052, 0.006, 0.003, 0.098, 0.004],
- [0.057, 0.097, 0.166, 0.195, 0.039, 1.175, 0.094, 0.021, 0.088, 0.007, 0.064, 0.365, 0.546, 1.758, 0.210, 0.224, 0.001, 1.277, 0.290, 0.442, 0.870, 0.178, 0.330, 0.019, 0.036, 0.003],
- [0.324, 0.001, 0.001, 0.001, 0.478, 0.001, 0.000, 0.094, 0.123, 0.000, 0.001, 0.263, 0.016, 0.001, 0.361, 0.137, 0.000, 0.474, 0.055, 0.106, 0.105, 0.000, 0.001, 0.000, 0.012, 0.000],
- [0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.148, 0.000, 0.000, 0.000, 0.000, 0.000],
- [0.686, 0.027, 0.121, 0.189, 1.854, 0.032, 0.100, 0.015, 0.728, 0.001, 0.097, 0.086, 0.175, 0.160, 0.727, 0.042, 0.001, 0.121, 0.397, 0.362, 0.128, 0.069, 0.013, 0.001, 0.248, 0.001],
- [0.218, 0.008, 0.155, 0.005, 0.932, 0.017, 0.002, 0.315, 0.550, 0.000, 0.039, 0.056, 0.065, 0.009, 0.398, 0.191, 0.007, 0.006, 0.405, 1.053, 0.311, 0.001, 0.024, 0.000, 0.057, 0.000],
- [0.530, 0.003, 0.026, 0.001, 1.205, 0.006, 0.002, 3.556, 1.343, 0.000, 0.000, 0.098, 0.026, 0.010, 1.041, 0.004, 0.000, 0.426, 0.337, 0.171, 0.255, 0.001, 0.082, 0.000, 0.227, 0.004],
- [0.136, 0.089, 0.188, 0.091, 0.147, 0.019, 0.128, 0.001, 0.101, 0.001, 0.005, 0.346, 0.138, 0.394, 0.011, 0.136, 0.000, 0.543, 0.454, 0.405, 0.001, 0.003, 0.000, 0.004, 0.005, 0.002],
- [0.140, 0.000, 0.000, 0.000, 0.825, 0.000, 0.000, 0.000, 0.270, 0.000, 0.000, 0.000, 0.000, 0.000, 0.071, 0.000, 0.000, 0.001, 0.001, 0.000, 0.002, 0.000, 0.000, 0.000, 0.005, 0.000],
- [0.385, 0.001, 0.001, 0.004, 0.361, 0.002, 0.000, 0.379, 0.374, 0.000, 0.001, 0.015, 0.001, 0.079, 0.222, 0.001, 0.000, 0.031, 0.035, 0.007, 0.001, 0.000, 0.000, 0.000, 0.002, 0.000],
- [0.030, 0.000, 0.026, 0.000, 0.022, 0.002, 0.000, 0.004, 0.039, 0.000, 0.000, 0.001, 0.000, 0.000, 0.003, 0.067, 0.000, 0.000, 0.000, 0.047, 0.005, 0.002, 0.000, 0.003, 0.003, 0.000],
- [0.016, 0.004, 0.014, 0.007, 0.093, 0.001, 0.003, 0.001, 0.029, 0.000, 0.000, 0.015, 0.024, 0.013, 0.150, 0.025, 0.000, 0.008, 0.097, 0.017, 0.001, 0.000, 0.003, 0.000, 0.000, 0.002],
- [0.025, 0.000, 0.000, 0.000, 0.050, 0.000, 0.000, 0.001, 0.012, 0.000, 0.000, 0.001, 0.000, 0.000, 0.007, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002, 0.000, 0.000, 0.000, 0.002, 0.003],
- ])
- # fmt: on
- def _score(matrix):
- """Calculate score from known matrix of English digram frequencies."""
- return abs(matrix - DIGRAM_FREQS_ENGLISH).sum()
- def _get_plaintext(ciphertext, decryption_key):
- # The decryption key will be in order of most common first, so we need to construct
- # a list of indices where to insert each to get an "alphabetical key" instead.
- indices = [ascii_lowercase.index(letter) for letter in "etaoinsrhldcumfpgwybvkxjqz"]
- translation_table = {}
- for cipher_letter, index in zip(decryption_key, indices):
- plain_letter = ascii_lowercase[index]
- translation_table[cipher_letter] = plain_letter
- return "".join([translation_table[c] for c in ciphertext])
- def _get_digram_frequencies(text):
- frequencies = np.zeros((26, 26))
- text = text.lower()
- text_length = len(text)
- # First, count the number of occurrences of each letter and save to the index that
- # corresponds to the letter pair, i.e. frequencies[0, 0] is for "aa" etc.
- for i in range(0, text_length - 1):
- a = ascii_lowercase.index(text[i])
- b = ascii_lowercase.index(text[i + 1])
- frequencies[a, b] += 1
- # Replace each entry with a percentage of the total text length, to get the same
- # format as the English digram frequencies.
- rows, columns = frequencies.shape
- for i in range(rows):
- for j in range(columns):
- # All digram frequencies are in percentages, so convert it here too.
- frequencies[i, j] = 100 * frequencies[i, j] / text_length
- return frequencies
- def _swap(matrix, index1, index2):
- matrix[[index1, index2]] = matrix[[index2, index1]] # Swap rows
- matrix[:, [index1, index2]] = matrix[:, [index2, index1]] # Swap columns
- ciphertext = (
- "uknkgmhksztkmexmpbxtgxesxekeskvuakgluepbhvpmhhpvxtwhphmvydmrbuthhgkfxgse"
- "xmpbhmeymhhohwgkzdwxenzfbhhlvabufbymkfhvnzehmihvkestuggvnhaupbshgurbpsxz"
- "xddeshmvpkespbuvthhguerpbuvymhhohabufbbkvpmkihgghstmxnpbhmhruxevpxakmsva"
- "bufbuknksikefuerruihvnhktxmhpkvphxtpbxvhufzfgunhvuevwumuphsyzpbuvauesxtw"
- "mxnuvhnzskzs"
- )
- print(f"\nCiphertext:\n{ciphertext}")
- # Initial key is the letters in the ciphertext in order of most common first, and with
- # remaining letters (not present in ciphertext) added at the end.
- c = Counter(ciphertext)
- key = [letter[0] for letter in c.most_common()]
- for c in ascii_lowercase:
- if c not in key:
- key += c
- print(f"\nInitial Key:\n{''.join(key)}")
- # Algorithm starts here.
- putative_plaintext = _get_plaintext(ciphertext, key)
- digram_frequencies = _get_digram_frequencies(putative_plaintext)
- best_score = _score(digram_frequencies)
- print(f"\nInitial score is {best_score}")
- print("Starting climb algorithm\n")
- for i in range(1, 26):
- for j in range(26 - i):
- d = np.copy(digram_frequencies)
- _swap(d, j, j + i)
- score = _score(d)
- if score < best_score:
- digram_frequencies = np.copy(d)
- key[j], key[j + i] = key[j + i], key[j]
- best_score = score
- print(f"Got new best score {score:.02f} and key is now {''.join(key)}")
- print(f"\nFinal Key:\n{''.join(key)}")
- plaintext = _get_plaintext(ciphertext, key)
- print(f"\nPlaintext:\n{plaintext}\n")
Add Comment
Please, Sign In to add comment