Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # https://github.com/johnmyleswhite/JuliaVsR in Python
- import random
- import csv
- import sys
- import math
- import time
- letters = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
- 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z')
- caeser_cipher = dict()
- inv_caeser_cipher = dict()
- n = len(letters)
- for i in xrange(n):
- caeser_cipher[letters[i]] = letters[(i+1) % n]
- inv_caeser_cipher[letters[(i+1) % n]] = letters[i]
- def apply_cipher(text, cipher):
- return "".join([cipher[t] for t in text])
- def generate_random_cipher():
- cipher = dict()
- idx = range(len(letters))
- random.shuffle(idx)
- for i in xrange(len(letters)):
- cipher[letters[i]] = letters[idx[i]]
- return cipher
- # swap two elements of the cipher
- def propose_modified_cipher(cipher):
- a = random.sample(letters, 2)
- new = cipher.copy()
- new[a[0]] = cipher[a[1]]
- new[a[1]] = cipher[a[0]]
- return new
- lexical_db = dict()
- with open('lexical_database.csv', 'r') as f:
- reader = csv.reader(f, delimiter=',')
- for word, frequency in reader:
- lexical_db[word] = double(frequency)
- def log_probability_of_text(text, cipher, lexical_db):
- s = [apply_cipher(word, cipher) for word in text]
- return sum([math.log(lexical_db.get(word, sys.float_info.epsilon)) for word in s])
- def metropolis_step(text, cipher, lexical_db):
- proposed_cipher = propose_modified_cipher(cipher)
- lp1 = log_probability_of_text(text, cipher, lexical_db)
- lp2 = log_probability_of_text(text, proposed_cipher, lexical_db)
- if lp2 > lp1:
- return proposed_cipher
- else:
- a = math.exp(lp2 - lp1)
- x = random.random()
- if x < a:
- return proposed_cipher
- else:
- return cipher
- plaintext = "here is some sample text"
- encrypted_text = [apply_cipher(word, caeser_cipher) for word in plaintext.split()]
- #random.seed(1)
- cipher = generate_random_cipher()
- n_iterations = 50000
- starttime = time.time()
- results = []
- for i in xrange(n_iterations):
- lp = log_probability_of_text(encrypted_text, cipher, lexical_db)
- current_decrypt = " ".join([apply_cipher(word, cipher) for word in encrypted_text])
- is_correct = plaintext == current_decrypt
- results.append((i, lp, current_decrypt, is_correct))
- cipher = metropolis_step(encrypted_text, cipher, lexical_db)
- endtime = time.time()
- print (endtime - starttime)
- with open('py_results.tsv', 'w') as f:
- writer = csv.writer(f)
- writer.writerows(results)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement