Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## DNICE -> lol@auburn.edu
- import math
- import re
- from scipy.stats.stats import pearsonr
- import pylab
- class Markov(object):
- def doMarkov(self, text_list,gram_size):
- gram_str_dict = {}
- gram_size += 1
- # Read all of the tokens from the database:
- # Create a dictionary of gram strings
- # AAA,AAB,AAC -> {AA:[A,B,C]}
- for token in text_list:
- x = re.compile('.'*gram_size)
- grams = x.findall(token)
- for g in grams:
- if g[:gram_size-1] not in gram_str_dict:
- gram_str_dict[g[:gram_size-1]] = [g[-1]]
- else:
- gram_str_dict[g[:gram_size-1]].append(g[-1])
- return gram_str_dict
- def calculateSurprisal(self, token, gram_str_dict,gram_size):
- total_prob = 1.0
- gram_size += 1
- x = re.compile('.'*gram_size)
- for g in x.findall(token):
- gram = g[:gram_size-1]
- gram_next_char = g[-1]
- gram_list = gram_str_dict[gram]
- total_next_states = len(gram_list)
- my_states = gram_list.count(gram_next_char)
- gram_prob = float(my_states)/float(total_next_states)
- total_prob *= gram_prob
- surprisal = -1*math.log(total_prob,2)
- return surprisal
- if __name__ == '__main__':
- # Create a new markov class
- m = Markov()
- flat_db = open('./parsed_db.txt')
- lines = flat_db.readlines()
- username_list = []
- password_list = []
- email_list = []
- username_surprisals = []
- password_surprisals = []
- email_surprisals = []
- uvp = []
- gram_len = 3
- username_re = re.compile('^.*?:::')
- password_re = re.compile(':::.*?:::')
- #TODO passwords and emails
- for i in lines:
- username = username_re.findall(i)[0][:-4]
- password = password_re.findall(i)[0][4:-4]
- #email = email_re.findall(i)[0][4:]
- if len(username) > gram_len and len(password) > gram_len:
- username_list.append(username)
- password_list.append(password)
- username_gram_str_dict = m.doMarkov(username_list,gram_len)
- password_gram_str_dict = m.doMarkov(password_list,gram_len)
- #email_gram_str_dict = m.doMarkov(password_list,gram_len)
- for i in username_list:
- surprisal = m.calculateSurprisal(i,username_gram_str_dict,gram_len)
- username_surprisals.append(float(surprisal))
- for i in password_list:
- surprisal = m.calculateSurprisal(i,password_gram_str_dict,gram_len)
- password_surprisals.append(float(surprisal))
- print pearsonr(password_surprisals,username_surprisals)
- pylab.scatter(password_surprisals,username_surprisals)
- pylab.show()
Add Comment
Please, Sign In to add comment