Advertisement
Guest User

Untitled

a guest
Oct 23rd, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.39 KB | None | 0 0
  1. import math
  2.  
  3.  
  4. with open("tora.txt", encoding="utf-8") as f:
  5.     text = list(f.read().lower())
  6.  
  7.     #############################################################
  8.     #  Alphabet intializing
  9.  
  10.     textLen = len(text)
  11.     print("Длина текста:", textLen)
  12.     symbols = {}
  13.  
  14.     for t in text:
  15.         if t in symbols:
  16.             symbols[t] += 1
  17.         else:
  18.             symbols[t] = 1
  19.  
  20.     alphabet = list(symbols.keys())
  21.  
  22.     # print("алфавит:", alphabet)
  23.     print("Мощность алфавита:", len(alphabet))
  24.  
  25.     probabilities = {}
  26.  
  27.     for k, v in symbols.items():
  28.         probabilities[k] = v/textLen
  29.  
  30.     # print(probabilities)
  31.  
  32.     #############################################################
  33.     # H0 counting
  34.  
  35.     h0 = 0
  36.     for k, v in probabilities.items():
  37.         h0 += v * math.log(v,2)
  38.     h0 *= -1
  39.  
  40.     print("H0:", h0)
  41.  
  42.     #############################################################
  43.     # H1 counting
  44.  
  45.     pairs = list(zip(text[:-1], text[1:]))
  46.  
  47.     pairCounts = {}
  48.  
  49.     for p in pairs:
  50.         if p in pairCounts:
  51.             pairCounts[p] += 1
  52.         else:
  53.             pairCounts[p] = 1
  54.  
  55.     pairsCount = len(text) - 1
  56.     pairProbs = {}
  57.  
  58.     for k, v in pairCounts.items():
  59.         pairProbs[k] = v/pairsCount
  60.  
  61.     #Conditional entropy counting
  62.  
  63.     h1 = 0
  64.     for i in alphabet:
  65.         for j in alphabet:
  66.             if (i, j) in pairProbs:
  67.                 condprob = min((pairProbs[(i, j)] / probabilities[i]), 1)
  68.                 # print(condprob)
  69.                 h1 += pairProbs[(i, j)] * math.log(condprob, 2)
  70.  
  71.     h1 *= -1
  72.     print("H1: " + str(h1))
  73.  
  74.  
  75.     #############################################################
  76.     #H2 counting
  77.  
  78.     triplets = list(zip(text[:-2], text[1:-1], text[2:]))
  79.  
  80.     tripletCounts = {}
  81.  
  82.     for t in triplets:
  83.         if t in tripletCounts:
  84.             tripletCounts[t] += 1
  85.         else:
  86.             tripletCounts[t] = 1
  87.  
  88.     tripletsCount = textLen - 2
  89.  
  90.     tripletProbs = {}
  91.  
  92.     for k, v in tripletCounts.items():
  93.         tripletProbs[k] = v/tripletsCount
  94.  
  95.     h2 = 0
  96.  
  97.     for i in alphabet:
  98.         for k, v in pairProbs.items():
  99.             key = (k[0], k[1], i)
  100.             if key in tripletProbs:
  101.                 condprob = tripletProbs[key] / v
  102.                 h2 += tripletProbs[key] * math.log(condprob,2)
  103.     h2 *= -1
  104.  
  105.     print("h2: " + str(h2))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement