Untitled

import re
from collections import defaultdict, Counter
from typing import Dict, List


def create_symbols(word):
    return ''.join(filter(str.isalpha, word))


def match_words(words: list, reg: str) -> list:
    return [w for w in words if re.match(reg, w)]


def count_words(text: str) -> tuple:
    words = [create_symbols(w) for w in text.lower().split()]
    word_dict = defaultdict(lambda: defaultdict(int))
    tot = len(words)
    for word in words:
        word_dict[len(word)][word] += 1

    for k, words_same_size in word_dict.items():
        for j, count in words_same_size.items():
            word_dict[k][j] = count / (tot * 0.01)

        word_dict[k] = sorted(word_dict[k].items(), key=lambda x: -x[1])

    return word_dict, set(words)


def count_chars(s: str) -> list:
    s = s.lower()
    d = Counter(filter(str.isalpha, s))
    tot = sum(d.values())

    for k, v in d.items():
        d[k] = v / (tot * 0.01)

    return sorted(d.items(), key=lambda x: -x[1])


def is_checkable(word: str, key: Dict[str, str]) -> bool:
    return any(key[k] is not None and k in word for k in key)


def get_unknown_chars(key: Dict[str, str]) -> str:
    alph = 'АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнҢңОоөПпРрСсТтУуҮүФфХхЧчШшЩщЪъЫыЬьЭэЮюЯя'
    return ''.join(c for c in alph if c not in key.values())


def get_unknown_keys(key: Dict[str, str]) -> str:
    return ''.join(c for c in key if key[c] is None)


def check_pattern(match: str, word: str) -> bool:
    return all((match[i] == match[j]) == (word[i] == word[j]) for i in range(len(word) - 1) for j in range(i, len(word)))


def generate_pattern(word: str, key: Dict[str, str], counted_encoding_chars: List[List[str]]) -> str:
    reg = ''
    for s in word:
        if s in key.values():
            reg += list(key.keys())[list(key.values()).index(s)]
        elif s in 'нкы':
            reg += f'[{"".join(c[0] for c in counted_encoding_chars[1:4])}]'
        else:
            reg += f'[{get_unknown_keys(key)}]'

    return reg + '$'


def main():
    encoded_text = open('input.txt', 'r', encoding='UTF-8').read()
    sample_text = open('Manas-eposu-2010-S-Karalaev.txt', 'r', encoding='UTF-8').read().replace(
        'www.el-sozduk.kg', '')

    counted_chars = count_chars(sample_text)
    counted_encoding_chars = count_chars(encoded_text)

    _, encoded_words = count_words(encoded_text)
    _, sample_words = count_words(sample_text)


    counted_encoding_chars_dict = dict(counted_encoding_chars)
    counted_chars_dict = dict(counted_chars)

    key = dict()
    for ec in counted_encoding_chars:
        key[ec[0]] = None

    most_common_char = max(counted_chars_dict, key=counted_chars_dict.get)
    key[counted_encoding_chars[0][0]] = most_common_char

    print(counted_chars)
    print(counted_encoding_chars)

    unknown_chars = ''.join([c[0] for c in counted_chars[2:]])

    print(unknown_chars)

    while all(map(lambda x: x is not None, key.keys())):
        print(key)

        examples = []

        for word in encoded_words:
            if is_checkable(word, key):
                reg = ''

                for s in word:
                    if key.get(s, None) is None:
                        reg += f'[{get_unknown_chars(key)}]'
                    else:
                        reg += key[s]

                reg += '$'

                matches = match_words(sample_words, reg)
                if word == 'лңк':
                    print(matches, reg)
                matches = list(filter(lambda x: check_pattern(x, word), matches))


                if len(matches) == 1 and any(map(lambda s: s not in key.values(), matches[0])):
                    err = 0.0
                    is_valid = True

                    for i in range(len(matches[0])):
                        if key[word[i]] is None:
                            err += abs(
                                counted_encoding_chars_dict[word[i]] * 1.0 / counted_chars_dict[matches[0][i]] - 1)

                            if matches[0][i] in list(key.values()):
                                is_valid = False
                                break

                    if is_valid:
                        examples.append((word, matches[0], err / len(word)))

        if len(examples) == 0:
            break

        examples.sort(key=lambda x: x[2])

        new_letters = 0
        new_words = 0

        for example in examples:
            new_words += 1
            for i in range(len(example[0])):
                if key[example[0][i]] is None:
                    key[example[0][i]] = example[1][i]
                    new_letters += 1
            if new_words > 8 and new_letters > 5:
                break

        print(examples[0])

    text = ''

    for w in encoded_text:
        w = w.lower()
        if w.isalpha():
            text += key[w] if key.get(w, None) is not None else 'X'
        else:
            text += w

    while True:
        print(key)

        char_to_replace = input("Введите символ, который нужно заменить (или нажмите Enter для продолжения): ")

        if not char_to_replace:
            break

        if char_to_replace in key:
            # Запрос у пользователя на новый символ
            new_char = input(f"Введите новое значение для символа '{char_to_replace}': ")
            key[char_to_replace] = new_char

            # Обновление текста с новым ключом
            new_text = ''
            for w in encoded_text:
                w = w.lower()
                if w.isalpha():
                    new_text += key[w] if key.get(w, None) is not None else 'X'
                else:
                    new_text += w
            text = new_text

            # Вывод текста с новым ключом
            print("After decode\n")
            print(text)

    print("Before decode\n")
    with open('input.txt', 'r', encoding='UTF-8') as file:
        content = file.read()
        print(content)

    print("Final decode\n")
    print(text)


if __name__ == '__main__':
    main()