Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import math
- with open("tora.txt", encoding="utf-8") as f:
- text = list(f.read().lower())
- #############################################################
- # Alphabet intializing
- textLen = len(text)
- print("Длина текста:", textLen)
- symbols = {}
- for t in text:
- if t in symbols:
- symbols[t] += 1
- else:
- symbols[t] = 1
- alphabet = list(symbols.keys())
- # print("алфавит:", alphabet)
- print("Мощность алфавита:", len(alphabet))
- probabilities = {}
- for k, v in symbols.items():
- probabilities[k] = v/textLen
- # print(probabilities)
- #############################################################
- # H0 counting
- h0 = 0
- for k, v in probabilities.items():
- h0 += v * math.log(v,2)
- h0 *= -1
- print("H0:", h0)
- #############################################################
- # H1 counting
- pairs = list(zip(text[:-1], text[1:]))
- pairCounts = {}
- for p in pairs:
- if p in pairCounts:
- pairCounts[p] += 1
- else:
- pairCounts[p] = 1
- pairsCount = len(text) - 1
- pairProbs = {}
- for k, v in pairCounts.items():
- pairProbs[k] = v/pairsCount
- #Conditional entropy counting
- h1 = 0
- for i in alphabet:
- for j in alphabet:
- if (i, j) in pairProbs:
- condprob = min((pairProbs[(i, j)] / probabilities[i]), 1)
- # print(condprob)
- h1 += pairProbs[(i, j)] * math.log(condprob, 2)
- h1 *= -1
- print("H1: " + str(h1))
- #############################################################
- #H2 counting
- triplets = list(zip(text[:-2], text[1:-1], text[2:]))
- tripletCounts = {}
- for t in triplets:
- if t in tripletCounts:
- tripletCounts[t] += 1
- else:
- tripletCounts[t] = 1
- tripletsCount = textLen - 2
- tripletProbs = {}
- for k, v in tripletCounts.items():
- tripletProbs[k] = v/tripletsCount
- h2 = 0
- for i in alphabet:
- for k, v in pairProbs.items():
- key = (k[0], k[1], i)
- if key in tripletProbs:
- condprob = tripletProbs[key] / v
- h2 += tripletProbs[key] * math.log(condprob,2)
- h2 *= -1
- print("h2: " + str(h2))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement