Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # #!/usr/bin/python
- #
- # By dvdjaco
- # Exercise 10.3
- #
- # most_frequent takes a string and prints the letters in decreasing order of frequency
- import string
- def most_frequent(s):
- # first remove all punctuation and spaces, and lowercase the string:
- s = s.translate(None,string.punctuation)
- s = s.translate(None,' ')
- s = s.lower()
- # print s
- # build a dictionary where keys are letters and values are counts
- d = dict()
- totalchars = 0
- for char in s:
- d[char] = d.get(char,0) + 1
- totalchars = totalchars + 1
- # build a list of tuples to sort the dictionary by descending value
- l = list()
- for char,count in d.items(): l.append( (count,char) )
- l.sort(reverse=True)
- # print the list including the relative frequency of each letter
- for count,char in l:
- r = 100*float(count)/float(totalchars)
- print char, count, (str("%.2f" % r)) + "%"
- f = raw_input("Enter a filename: ")
- try:
- fhand = open(f)
- except err:
- print "Error opening file ", f, err
- s = ''
- for line in fhand:
- s = s + line.rstrip()
- most_frequent(s)
- #
- # Below the results on a few books and a mark in the first letter with a different order when compared to http://en.wikipedia.org/wiki/Letter_frequency
- #
- # Hitchhiker's Guide to the Galaxy (EN)
- # $ python exercise-10.3.py
- # Enter a filename: hh.txt
- # e 29391 10.26%
- # t 23494 8.20%
- # a 23143 8.08%
- # s 18725 6.54% <--
- # i 18246 6.37%
- # o 16918 5.91%
- # l 15218 5.31%
- # r 15006 5.24%
- # h 14849 5.18%
- # n 14720 5.14%
- #
- #
- # The Selfish Gene (EN)
- # $ python exercise-10.3.py
- # Enter a filename: sg.txt
- # e 72639 12.56%
- # t 52166 9.02%
- # a 46869 8.10%
- # i 43425 7.51% <--
- # o 41231 7.13%
- # s 41077 7.10%
- # n 37575 6.49%
- # r 32962 5.70%
- # h 28264 4.89%
- # l 28116 4.86%
- #
- # Don Quijote de la Mancha (ES)
- # $ python exercise-10.3.py
- # Enter a filename: pg2000.txt
- # e 223827 13.15%
- # a 194365 11.42%
- # o 154709 9.09%
- # s 126489 7.43%
- # n 109490 6.43% <--
- # r 102124 6.00%
- # l 89630 5.27%
- # d 87776 5.16%
- # i 78739 4.63%
- # u 78717 4.63%
Advertisement
Add Comment
Please, Sign In to add comment