Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """This script compares letter frequency in an English-language text
- from Project Gutenberg with a standard letter
- frequency metric.
- The text selected is 'Turkish Literature' at
- http://www.gutenberg.org/files/56464/56464-0.txt
- Standard for letter frequency is from the words in the Oxford
- Concise Dictionary as per the Wikipedia article 'Letter
- Frequency in English' at https://en.wikipedia.org/wiki/Letter_frequency
- """
- from string import ascii_lowercase
- freqdict = {'a': 8.167, 'b': 1.492, 'c': 2.782, 'd': 4.253, 'e': 12.702,
- 'f': 2.228, 'g': 2.015, 'h': 6.094, 'i': 6.966, 'j': 0.153,
- 'k': 0.772, 'l': 4.025, 'm': 2.406, 'n': 6.749, 'o': 7.507,
- 'p': 1.929, 'q': 0.095, 'r': 5.987, 's': 6.327, 't': 9.056,
- 'u': 2.758, 'v': 0.987, 'w': 2.360, 'x': 0.150, 'y': 1.974,
- 'z': 0.074}
- bookfreq = dict()
- for letter in ascii_lowercase:
- bookfreq[letter] = 0
- filename = "source.txt"
- total = 0
- with open(filename, 'r') as handle:
- while True:
- line = handle.readline()
- if line == '':
- break
- else:
- for char in line:
- if char in ascii_lowercase:
- bookfreq[char] += 1
- total += 1
- for key in bookfreq:
- '''Establishes percentage letter frequency in sample text.'''
- bookfreq[key] = round(((bookfreq[key] / total) * 100), 3)
- # print(bookfreq)
- compare = dict()
- for key in bookfreq:
- '''Percentage deviation of sample text from standard, per letter.'''
- compare[key] = round(((1 - (bookfreq[key] / freqdict[key])) * 100), 3)
- print(compare)
Advertisement
Add Comment
Please, Sign In to add comment