Advertisement
maurol22

hyp_test

May 17th, 2019
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.31 KB | None | 0 0
  1. import re
  2. import math
  3. from string import ascii_lowercase
  4. import random
  5. from scipy import stats
  6.  
  7. # Genera una stringa casuale di lunghezza fissata
  8. def randomString(stringLength):
  9.     letters = ascii_lowercase
  10.     return ''.join(random.choice(letters) for i in range(stringLength))
  11.  
  12. # Effettua il preprocessing del testo
  13. def preprocessing(text):
  14.     text = text.lower()
  15.     text = re.sub(r"['\",.;:_@#()”“’—?!&$\n]+\ *", " ", text) # conversione dei caratteri speciali in uno spazio
  16.     text = text.replace("-", "") # conversione del carattere - in uno spazio
  17.     text = text.replace(" ", "") # rimozione spazi
  18.     return text
  19.  
  20. def meanLogLikelihoodRatio(string,p,q):
  21.     length = len(string)
  22.     value = 0
  23.     div_p_q = stats.entropy(p, q, base=2)
  24.     div_q_p = stats.entropy(q, p, base=2)
  25.  
  26.     for i in range(length):
  27.         j = ascii_lowercase.index(string[i])
  28.         a = p[j]/q[j]
  29.         value += math.log(a,2)
  30.  
  31.     value = value / length
  32.  
  33.     x = math.fabs(value - div_p_q)
  34.     y = math.fabs(value - (- div_q_p))
  35.  
  36.     print("x: " + str(x) + " y: " + str(y))
  37.  
  38.     if x < y:
  39.         alpha = 2**(-length * div_q_p)
  40.         return("Random String", value, alpha)
  41.     elif x > y:
  42.         beta =  2**(-length * div_p_q)
  43.         return("English String", value, beta)
  44.     else:
  45.         return("I Don't Know", value, 1)
  46.  
  47. def main():
  48.     # Distribuzione delle lettere casuali: viene presa come ipotesi nulla
  49.     randomLettersProbs = [1 / 26 for letter in ascii_lowercase]
  50.  
  51.     # Frequenza delle lettere lingua inglese: viene presa come ipotesi alternativa
  52.     englishLettersProbs = [0.08167, 0.01492, 0.02782, 0.04253, 0.12702,
  53.                            0.02228, 0.02015, 0.06094, 0.06966, 0.00153,
  54.                            0.00772, 0.04025, 0.02406, 0.06749, 0.07507,
  55.                            0.01929, 0.00095, 0.05987, 0.06327, 0.09056,
  56.                            0.02758, 0.00978, 0.02360, 0.00150, 0.01974,
  57.                            0.00074]
  58.  
  59.     # generazione di stringhe random
  60.     randomStrings = []
  61.     stringLength = 25
  62.     randomStringNumber = 50
  63.     for i in range(randomStringNumber):
  64.         string = randomString(stringLength)
  65.         randomStrings.append(string)
  66.  
  67.     # lettura delle stringhe in lingua inglese
  68.     f = open("text.txt", 'r')
  69.     englishText = f.read()
  70.     englishText = preprocessing(englishText)
  71.  
  72.     englishStrings = []
  73.     while englishText != "":
  74.         englishStrings.append(englishText[0:stringLength])
  75.         englishText = englishText[stringLength:]
  76.  
  77.     print("We'll now test the random strings")
  78.     randomErrorCounter = 0
  79.     for string in randomStrings:
  80.         result, value, errorProb = meanLogLikelihoodRatio(string, randomLettersProbs, englishLettersProbs)
  81.         print(result, value, errorProb)
  82.         if result != "Random String":
  83.             randomErrorCounter += 1
  84.  
  85.     print(randomErrorCounter)
  86.  
  87.     print("We'll now test the english strings")
  88.     englishErrorCounter = 0
  89.     for string in englishStrings:
  90.         result, value, error_prob = meanLogLikelihoodRatio(string, randomLettersProbs, englishLettersProbs)
  91.         print(string)
  92.         print(result, value, error_prob)
  93.         if result != "English String":
  94.             englishErrorCounter += 1
  95.  
  96.     print(englishErrorCounter)
  97.  
  98. if __name__ == '__main__':
  99.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement