Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import math
- from string import ascii_lowercase
- import random
- from scipy import stats
- # Genera una stringa casuale di lunghezza fissata
- def randomString(stringLength):
- letters = ascii_lowercase
- return ''.join(random.choice(letters) for i in range(stringLength))
- # Effettua il preprocessing del testo
- def preprocessing(text):
- text = text.lower()
- text = re.sub(r"['\",.;:_@#()”“’—?!&$\n]+\ *", " ", text) # conversione dei caratteri speciali in uno spazio
- text = text.replace("-", "") # conversione del carattere - in uno spazio
- text = text.replace(" ", "") # rimozione spazi
- return text
- def meanLogLikelihoodRatio(string,p,q):
- length = len(string)
- value = 0
- div_p_q = stats.entropy(p, q, base=2)
- div_q_p = stats.entropy(q, p, base=2)
- for i in range(length):
- j = ascii_lowercase.index(string[i])
- a = p[j]/q[j]
- value += math.log(a,2)
- value = value / length
- x = math.fabs(value - div_p_q)
- y = math.fabs(value - (- div_q_p))
- print("x: " + str(x) + " y: " + str(y))
- if x < y:
- alpha = 2**(-length * div_q_p)
- return("Random String", value, alpha)
- elif x > y:
- beta = 2**(-length * div_p_q)
- return("English String", value, beta)
- else:
- return("I Don't Know", value, 1)
- def main():
- # Distribuzione delle lettere casuali: viene presa come ipotesi nulla
- randomLettersProbs = [1 / 26 for letter in ascii_lowercase]
- # Frequenza delle lettere lingua inglese: viene presa come ipotesi alternativa
- englishLettersProbs = [0.08167, 0.01492, 0.02782, 0.04253, 0.12702,
- 0.02228, 0.02015, 0.06094, 0.06966, 0.00153,
- 0.00772, 0.04025, 0.02406, 0.06749, 0.07507,
- 0.01929, 0.00095, 0.05987, 0.06327, 0.09056,
- 0.02758, 0.00978, 0.02360, 0.00150, 0.01974,
- 0.00074]
- # generazione di stringhe random
- randomStrings = []
- stringLength = 25
- randomStringNumber = 50
- for i in range(randomStringNumber):
- string = randomString(stringLength)
- randomStrings.append(string)
- # lettura delle stringhe in lingua inglese
- f = open("text.txt", 'r')
- englishText = f.read()
- englishText = preprocessing(englishText)
- englishStrings = []
- while englishText != "":
- englishStrings.append(englishText[0:stringLength])
- englishText = englishText[stringLength:]
- print("We'll now test the random strings")
- randomErrorCounter = 0
- for string in randomStrings:
- result, value, errorProb = meanLogLikelihoodRatio(string, randomLettersProbs, englishLettersProbs)
- print(result, value, errorProb)
- if result != "Random String":
- randomErrorCounter += 1
- print(randomErrorCounter)
- print("We'll now test the english strings")
- englishErrorCounter = 0
- for string in englishStrings:
- result, value, error_prob = meanLogLikelihoodRatio(string, randomLettersProbs, englishLettersProbs)
- print(string)
- print(result, value, error_prob)
- if result != "English String":
- englishErrorCounter += 1
- print(englishErrorCounter)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement