Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import defaultdict
- from math import gcd
- from collections import Counter
- from statistics import mean
- import os
- word_len = 20
- key_len = 20
- with open(os.path.join('data', 'case8'), encoding='cp1251') as s:
- txt = s.read()
- substr = defaultdict(list)
- for l in range(3, word_len + 1):
- for i in range(len(txt) - l):
- s = txt[i:i + l]
- substr[s].append(i)
- l_k1 = set()
- for x in substr:
- m = substr[x]
- i = 0
- gcd_x = 0
- for j in range(1, len(m)):
- if m[j] - m[i] >= len(x):
- gcd_x = gcd(m[j] - m[i], gcd_x)
- i = j
- if gcd_x:
- l_k1.add(gcd_x)
- print(list(sorted(l_k1))[:key_len + 1])
- I = lambda s: sum(x[1] * (x[1] - 1) / len(s) / (len(s) - 1)
- for x in Counter(s).most_common())
- means = [mean(I(txt[i::l]) for i in range(l))
- for l in range(2, key_len + 1)]
- l_k2 = [(abs(0.0553 - means[i]), i + 2) for i in range(len(means))]
- l_k2.sort()
- _, l_k2 = zip(*l_k2)
- print(l_k2)
- l_k = [x for x in l_k2 if x in l_k1]
- print(l_k)
- k_len = 4
- k = []
- alp = ''.join(map(chr, range(ord('А'), ord('Я') + 1))) + ' '
- get_ord = alp.find
- get_chr = alp.__getitem__
- for i in range(k_len):
- c = Counter(txt[i::k_len]).most_common(1)[0][0]
- k.append((get_ord(c) - get_ord(' ') + len(alp)) % len(alp))
- txt = ''.join(get_chr((get_ord(x) - k[i % k_len] + len(alp)) % len(alp))
- for i, x in enumerate(txt))
- print(txt)
- print(k)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement