Untitled

from collections import Counter
from string import ascii_lowercase

import numpy as np

# fmt: off
DIGRAM_FREQS_ENGLISH = np.array([
    [0.003, 0.230, 0.448, 0.368, 0.012, 0.074, 0.205, 0.014, 0.316, 0.012, 0.105, 1.087, 0.285, 1.985, 0.005, 0.203, 0.002, 1.075, 0.871, 1.487, 0.119, 0.205, 0.060, 0.019, 0.217, 0.012],
    [0.146, 0.011, 0.002, 0.002, 0.576, 0.000, 0.000, 0.001, 0.107, 0.023, 0.000, 0.233, 0.003, 0.002, 0.195, 0.001, 0.000, 0.112, 0.046, 0.017, 0.185, 0.004, 0.000, 0.000, 0.176, 0.000],
    [0.538, 0.001, 0.083, 0.002, 0.651, 0.001, 0.001, 0.598, 0.281, 0.000, 0.118, 0.149, 0.003, 0.001, 0.794, 0.001, 0.005, 0.149, 0.023, 0.461, 0.163, 0.000, 0.000, 0.000, 0.042, 0.001],
    [0.151, 0.003, 0.003, 0.043, 0.765, 0.003, 0.031, 0.005, 0.493, 0.005, 0.000, 0.032, 0.018, 0.008, 0.188, 0.002, 0.001, 0.085, 0.126, 0.003, 0.148, 0.019, 0.008, 0.000, 0.050, 0.000],
    [0.688, 0.027, 0.477, 1.168, 0.378, 0.163, 0.120, 0.026, 0.183, 0.005, 0.016, 0.530, 0.374, 1.454, 0.073, 0.172, 0.057, 2.048, 1.339, 0.413, 0.031, 0.255, 0.117, 0.214, 0.144, 0.005],
    [0.164, 0.000, 0.001, 0.000, 0.237, 0.146, 0.001, 0.000, 0.285, 0.000, 0.000, 0.065, 0.001, 0.000, 0.488, 0.000, 0.000, 0.213, 0.006, 0.082, 0.096, 0.000, 0.000, 0.000, 0.009, 0.000],
    [0.148, 0.000, 0.000, 0.003, 0.385, 0.001, 0.025, 0.228, 0.152, 0.000, 0.000, 0.061, 0.010, 0.066, 0.132, 0.000, 0.000, 0.197, 0.051, 0.015, 0.086, 0.000, 0.001, 0.000, 0.026, 0.000],
    [0.926, 0.004, 0.001, 0.003, 3.075, 0.002, 0.000, 0.001, 0.763, 0.000, 0.000, 0.013, 0.013, 0.026, 0.485, 0.001, 0.000, 0.084, 0.015, 0.130, 0.074, 0.000, 0.005, 0.000, 0.050, 0.000],
    [0.286, 0.099, 0.699, 0.296, 0.385, 0.203, 0.255, 0.002, 0.023, 0.001, 0.043, 0.432, 0.318, 2.433, 0.835, 0.089, 0.011, 0.315, 1.128, 1.123, 0.017, 0.288, 0.001, 0.022, 0.000, 0.064],
    [0.026, 0.000, 0.000, 0.000, 0.052, 0.000, 0.000, 0.000, 0.003, 0.000, 0.000, 0.000, 0.000, 0.000, 0.054, 0.000, 0.000, 0.000, 0.000, 0.000, 0.059, 0.000, 0.000, 0.000, 0.000, 0.000],
    [0.017, 0.001, 0.000, 0.001, 0.214, 0.002, 0.003, 0.003, 0.098, 0.000, 0.000, 0.011, 0.002, 0.051, 0.006, 0.001, 0.000, 0.003, 0.048, 0.001, 0.003, 0.000, 0.002, 0.000, 0.006, 0.000],
    [0.528, 0.007, 0.012, 0.253, 0.829, 0.053, 0.006, 0.002, 0.624, 0.000, 0.020, 0.577, 0.023, 0.006, 0.387, 0.019, 0.000, 0.010, 0.142, 0.124, 0.135, 0.035, 0.013, 0.000, 0.425, 0.000],
    [0.565, 0.090, 0.004, 0.001, 0.793, 0.004, 0.001, 0.001, 0.318, 0.000, 0.000, 0.005, 0.096, 0.009, 0.337, 0.239, 0.000, 0.003, 0.093, 0.001, 0.115, 0.000, 0.001, 0.000, 0.062, 0.000],
    [0.347, 0.004, 0.416, 1.352, 0.692, 0.067, 0.953, 0.011, 0.339, 0.011, 0.052, 0.064, 0.028, 0.073, 0.465, 0.006, 0.006, 0.009, 0.509, 1.041, 0.079, 0.052, 0.006, 0.003, 0.098, 0.004],
    [0.057, 0.097, 0.166, 0.195, 0.039, 1.175, 0.094, 0.021, 0.088, 0.007, 0.064, 0.365, 0.546, 1.758, 0.210, 0.224, 0.001, 1.277, 0.290, 0.442, 0.870, 0.178, 0.330, 0.019, 0.036, 0.003],
    [0.324, 0.001, 0.001, 0.001, 0.478, 0.001, 0.000, 0.094, 0.123, 0.000, 0.001, 0.263, 0.016, 0.001, 0.361, 0.137, 0.000, 0.474, 0.055, 0.106, 0.105, 0.000, 0.001, 0.000, 0.012, 0.000],
    [0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.148, 0.000, 0.000, 0.000, 0.000, 0.000],
    [0.686, 0.027, 0.121, 0.189, 1.854, 0.032, 0.100, 0.015, 0.728, 0.001, 0.097, 0.086, 0.175, 0.160, 0.727, 0.042, 0.001, 0.121, 0.397, 0.362, 0.128, 0.069, 0.013, 0.001, 0.248, 0.001],
    [0.218, 0.008, 0.155, 0.005, 0.932, 0.017, 0.002, 0.315, 0.550, 0.000, 0.039, 0.056, 0.065, 0.009, 0.398, 0.191, 0.007, 0.006, 0.405, 1.053, 0.311, 0.001, 0.024, 0.000, 0.057, 0.000],
    [0.530, 0.003, 0.026, 0.001, 1.205, 0.006, 0.002, 3.556, 1.343, 0.000, 0.000, 0.098, 0.026, 0.010, 1.041, 0.004, 0.000, 0.426, 0.337, 0.171, 0.255, 0.001, 0.082, 0.000, 0.227, 0.004],
    [0.136, 0.089, 0.188, 0.091, 0.147, 0.019, 0.128, 0.001, 0.101, 0.001, 0.005, 0.346, 0.138, 0.394, 0.011, 0.136, 0.000, 0.543, 0.454, 0.405, 0.001, 0.003, 0.000, 0.004, 0.005, 0.002],
    [0.140, 0.000, 0.000, 0.000, 0.825, 0.000, 0.000, 0.000, 0.270, 0.000, 0.000, 0.000, 0.000, 0.000, 0.071, 0.000, 0.000, 0.001, 0.001, 0.000, 0.002, 0.000, 0.000, 0.000, 0.005, 0.000],
    [0.385, 0.001, 0.001, 0.004, 0.361, 0.002, 0.000, 0.379, 0.374, 0.000, 0.001, 0.015, 0.001, 0.079, 0.222, 0.001, 0.000, 0.031, 0.035, 0.007, 0.001, 0.000, 0.000, 0.000, 0.002, 0.000],
    [0.030, 0.000, 0.026, 0.000, 0.022, 0.002, 0.000, 0.004, 0.039, 0.000, 0.000, 0.001, 0.000, 0.000, 0.003, 0.067, 0.000, 0.000, 0.000, 0.047, 0.005, 0.002, 0.000, 0.003, 0.003, 0.000],
    [0.016, 0.004, 0.014, 0.007, 0.093, 0.001, 0.003, 0.001, 0.029, 0.000, 0.000, 0.015, 0.024, 0.013, 0.150, 0.025, 0.000, 0.008, 0.097, 0.017, 0.001, 0.000, 0.003, 0.000, 0.000, 0.002],
    [0.025, 0.000, 0.000, 0.000, 0.050, 0.000, 0.000, 0.001, 0.012, 0.000, 0.000, 0.001, 0.000, 0.000, 0.007, 0.000, 0.000, 0.000, 0.000, 0.000, 0.002, 0.000, 0.000, 0.000, 0.002, 0.003],
])
# fmt: on


def _score(matrix):
    """Calculate score from known matrix of English digram frequencies."""

    return abs(matrix - DIGRAM_FREQS_ENGLISH).sum()


def _get_plaintext(ciphertext, decryption_key):
    # The decryption key will be in order of most common first, so we need to construct
    # a list of indices where to insert each to get an "alphabetical key" instead.
    indices = [ascii_lowercase.index(letter) for letter in "etaoinsrhldcumfpgwybvkxjqz"]

    translation_table = {}

    for cipher_letter, index in zip(decryption_key, indices):
        plain_letter = ascii_lowercase[index]
        translation_table[cipher_letter] = plain_letter

    return "".join([translation_table[c] for c in ciphertext])


def _get_digram_frequencies(text):
    frequencies = np.zeros((26, 26))

    text = text.lower()
    text_length = len(text)

    # First, count the number of occurrences of each letter and save to the index that
    # corresponds to the letter pair, i.e. frequencies[0, 0] is for "aa" etc.
    for i in range(0, text_length - 1):
        a = ascii_lowercase.index(text[i])
        b = ascii_lowercase.index(text[i + 1])
        frequencies[a, b] += 1

    # Replace each entry with a percentage of the total text length, to get the same
    # format as the English digram frequencies.
    rows, columns = frequencies.shape
    for i in range(rows):
        for j in range(columns):
            # All digram frequencies are in percentages, so convert it here too.
            frequencies[i, j] = 100 * frequencies[i, j] / text_length

    return frequencies


def _swap(matrix, index1, index2):
    matrix[[index1, index2]] = matrix[[index2, index1]]  # Swap rows
    matrix[:, [index1, index2]] = matrix[:, [index2, index1]]  # Swap columns


ciphertext = (
    "uknkgmhksztkmexmpbxtgxesxekeskvuakgluepbhvpmhhpvxtwhphmvydmrbuthhgkfxgse"
    "xmpbhmeymhhohwgkzdwxenzfbhhlvabufbymkfhvnzehmihvkestuggvnhaupbshgurbpsxz"
    "xddeshmvpkespbuvthhguerpbuvymhhohabufbbkvpmkihgghstmxnpbhmhruxevpxakmsva"
    "bufbuknksikefuerruihvnhktxmhpkvphxtpbxvhufzfgunhvuevwumuphsyzpbuvauesxtw"
    "mxnuvhnzskzs"
)

print(f"\nCiphertext:\n{ciphertext}")

# Initial key is the letters in the ciphertext in order of most common first, and with
# remaining letters (not present in ciphertext) added at the end.
c = Counter(ciphertext)
key = [letter[0] for letter in c.most_common()]

for c in ascii_lowercase:
    if c not in key:
        key += c

print(f"\nInitial Key:\n{''.join(key)}")

# Algorithm starts here.

putative_plaintext = _get_plaintext(ciphertext, key)
digram_frequencies = _get_digram_frequencies(putative_plaintext)

best_score = _score(digram_frequencies)
print(f"\nInitial score is {best_score}")
print("Starting climb algorithm\n")

for i in range(1, 26):
    for j in range(26 - i):
        d = np.copy(digram_frequencies)
        _swap(d, j, j + i)
        score = _score(d)
        if score < best_score:
            digram_frequencies = np.copy(d)
            key[j], key[j + i] = key[j + i], key[j]
            best_score = score
            print(f"Got new best score {score:.02f} and key is now {''.join(key)}")

print(f"\nFinal Key:\n{''.join(key)}")

plaintext = _get_plaintext(ciphertext, key)

print(f"\nPlaintext:\n{plaintext}\n")