
10-3
By:
Mars83 on
Oct 9th, 2011 | syntax:
Python | size: 1.48 KB | hits: 57 | expires: Never
#! /usr/bin/env python3.2
# -*- coding: utf-8 -*-
# main.py
""" Task: Exercise 10.3
Write a function called most_frequent that takes a string and prints the
letters in decreasing order of frequency. Find text samples from several
different languages and see how letter frequency varies between languages.
Compare your results with the tables at
wikipedia.org/wiki/Letter_frequencies.
"""
''' Imports '''
import string
''' Functions '''
def most_frequent(text):
total = 0 # total count without punctuation etc.
letters = dict()
t = list()
r = [] # 0, 1, 2, ... 7, 8, 9
for i in range(10):
r.append(str(i))
for letter in text:
if (letter in string.punctuation or letter in [' ', '\n', '\t']
or letter in r):
continue # skip non-alphabetic letters
# TODO: translate รค -> a (...)
letter = letter.lower()
letters[letter] = letters.get(letter, 0) + 1
total += 1
# Create a list to sort by frequency
for items in letters:
t.append((letters[items], items))
t.sort(reverse=True)
# Print-out
for i in t:
percentage = round(float(i[0]) / float(total) * 100, 3)
print(str(i[0]) + ":\t'" + str(i[1]) + "'\t" + str(percentage) + "%")
return (t, total)
''' Test '''
# TODO: guard
text = ""
file = open("mbox-short.txt", "r")
for line in file:
text += line
result = most_frequent(text)
file.close()