Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''Frequency Analyzer
- By Josh Famestad'''
- def get_count(char, message):
- '''(str, str) -> int
- Returns the number of ocurrances of char in message
- >>>get_count('a', 'the cat in the hat')
- 2
- >>>get_count('b', 'slug')
- 0
- '''
- count = 0
- for mchar in message:
- if char == mchar:
- count = count + 1
- return count
- def analyze(message):
- '''(str) -> str
- Returns the frequency of each character in the message
- >>>analyze('dog')
- d 1
- o 1
- g 1
- >>>analyze('ferret')
- f 1
- e 2
- r 2
- t 1
- '''
- result = {}
- alphabet = ' abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*'
- for char in alphabet:
- if char in message:
- result[char] = get_count(char, message)
- return result
- '''
- candidate for initial assumed order of declining frequency
- etaoinshrdlucmfwypvbgkjqxz
- most common digrams
- TH HE AN RE ER IN ON AT ND ST ES EN OF TE ED OR TI HI AS TO
- most common repeated letters
- LL EE SS OO TT FF RR NN PP CC
- The 'top twelve' letters comprise about 80% of the
- total usage. The 'top eight" letters comprise about 65% of the total usage.
- a 8.167%
- b 1.492%
- c 2.782%
- d 4.253%
- e 12.702%
- f 2.228%
- g 2.015%
- h 6.094%
- i 6.966%
- j 0.153%
- k 0.747%
- l 4.025%
- m 2.406%
- n 6.749%
- o 7.507%
- p 1.929%
- q 0.095%
- r 5.987%
- s 6.327%
- t 9.056%
- u 2.758%
- v 1.037%
- w 2.365%
- x 0.150%
- y 1.974%
- z 0.074%
- '''
- '''generate sample values'''
- '''cipher = define_cipher('qwertyuioplkjhgfdsazxcvbnm MNBVCXZQWERTYUIOPLKJHGFDSA')
- message = 'hidy ho'
- ciphertext = encrypt(message, cipher)
- data = analyze(ciphertext)'''
- def guess(cipher_char, plain_char):
- cipher[cipher_char] = plain_char
- def attempt_get_cipher(ciphertext):
- data = analyze(ciphertext)
- cipher = sorted(data, key=data.get, reverse=True)
- assumed_order = ' etaoinshrdlucmfwypvbgkjqxz'
- return dict(zip(cipher,' etaoinshrdlucmfwypvbgkjqxz'))
Add Comment
Please, Sign In to add comment