MickeyLater

letter frequency snippet

Feb 3rd, 2018
210
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.62 KB | None | 0 0
  1. """This script compares letter frequency in an English-language text
  2. from Project Gutenberg with a standard letter
  3. frequency metric.
  4.  
  5. The text selected is 'Turkish Literature' at
  6. http://www.gutenberg.org/files/56464/56464-0.txt
  7.  
  8. Standard for letter frequency is from the words in the Oxford
  9. Concise Dictionary as per the Wikipedia article 'Letter
  10. Frequency in English' at https://en.wikipedia.org/wiki/Letter_frequency
  11. """
  12. from string import ascii_lowercase
  13.  
  14. freqdict = {'a': 8.167, 'b': 1.492, 'c': 2.782, 'd': 4.253, 'e': 12.702,
  15. 'f': 2.228, 'g': 2.015, 'h': 6.094, 'i': 6.966, 'j': 0.153,
  16. 'k': 0.772, 'l': 4.025, 'm': 2.406, 'n': 6.749, 'o': 7.507,
  17. 'p': 1.929, 'q': 0.095, 'r': 5.987, 's': 6.327, 't': 9.056,
  18. 'u': 2.758, 'v': 0.987, 'w': 2.360, 'x': 0.150, 'y': 1.974,
  19. 'z': 0.074}
  20.  
  21.  
  22. bookfreq = dict()
  23. for letter in ascii_lowercase:
  24. bookfreq[letter] = 0
  25.  
  26. filename = "source.txt"
  27. total = 0
  28. with open(filename, 'r') as handle:
  29. while True:
  30. line = handle.readline()
  31. if line == '':
  32. break
  33. else:
  34. for char in line:
  35. if char in ascii_lowercase:
  36. bookfreq[char] += 1
  37. total += 1
  38.  
  39. for key in bookfreq:
  40. '''Establishes percentage letter frequency in sample text.'''
  41. bookfreq[key] = round(((bookfreq[key] / total) * 100), 3)
  42.  
  43. # print(bookfreq)
  44.  
  45. compare = dict()
  46. for key in bookfreq:
  47. '''Percentage deviation of sample text from standard, per letter.'''
  48. compare[key] = round(((1 - (bookfreq[key] / freqdict[key])) * 100), 3)
  49.  
  50. print(compare)
Advertisement
Add Comment
Please, Sign In to add comment