# 10-3

By: Mars83 on Oct 9th, 2011  |  syntax: Python  |  size: 1.48 KB  |  hits: 57  |  expires: Never
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
1. #! /usr/bin/env python3.2
2. # -*- coding: utf-8 -*-
3.
4. # main.py
6.    Write a function called most_frequent that takes a string and prints the
7.    letters in decreasing order of frequency. Find text samples from several
8.    different languages and see how letter frequency varies between languages.
9.    Compare your results with the tables at
10.    wikipedia.org/wiki/Letter_frequencies.
11. """
12.
13. ''' Imports '''
14. import string
15.
16. ''' Functions '''
17. def most_frequent(text):
18.     total = 0   # total count without punctuation etc.
19.     letters = dict()
20.     t = list()
21.     r = []      # 0, 1, 2, ... 7, 8, 9
22.     for i in range(10):
23.         r.append(str(i))
24.     for letter in text:
25.         if (letter in string.punctuation or letter in [' ', '\n', '\t']
26.             or letter in r):
27.            continue     # skip non-alphabetic letters
28.        # TODO: translate รค -> a (...)
29.         letter = letter.lower()
30.         letters[letter] = letters.get(letter, 0) + 1
31.         total += 1
32.     # Create a list to sort by frequency
33.     for items in letters:
34.         t.append((letters[items], items))
35.         t.sort(reverse=True)
36.     # Print-out
37.     for i in t:
38.         percentage = round(float(i[0]) / float(total) * 100, 3)
39.         print(str(i[0]) + ":\t'" + str(i[1]) + "'\t" + str(percentage) + "%")
40.     return (t, total)
41.
42. ''' Test '''
43. # TODO: guard
44. text = ""
45. file = open("mbox-short.txt", "r")
46. for line in file:
47.     text += line
48. result = most_frequent(text)
49. file.close()
50.