Don't like ads? PRO users don't see any ads ;-)
Guest

10-3

By: Mars83 on Oct 9th, 2011  |  syntax: Python  |  size: 1.48 KB  |  hits: 57  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #! /usr/bin/env python3.2
  2. # -*- coding: utf-8 -*-
  3.  
  4. # main.py
  5. """ Task: Exercise 10.3
  6.    Write a function called most_frequent that takes a string and prints the
  7.    letters in decreasing order of frequency. Find text samples from several
  8.    different languages and see how letter frequency varies between languages.
  9.    Compare your results with the tables at
  10.    wikipedia.org/wiki/Letter_frequencies.
  11. """
  12.  
  13. ''' Imports '''
  14. import string
  15.  
  16. ''' Functions '''
  17. def most_frequent(text):
  18.     total = 0   # total count without punctuation etc.
  19.     letters = dict()
  20.     t = list()
  21.     r = []      # 0, 1, 2, ... 7, 8, 9
  22.     for i in range(10):
  23.         r.append(str(i))
  24.     for letter in text:
  25.         if (letter in string.punctuation or letter in [' ', '\n', '\t']
  26.             or letter in r):
  27.            continue     # skip non-alphabetic letters
  28.        # TODO: translate รค -> a (...)
  29.         letter = letter.lower()
  30.         letters[letter] = letters.get(letter, 0) + 1
  31.         total += 1
  32.     # Create a list to sort by frequency
  33.     for items in letters:
  34.         t.append((letters[items], items))
  35.         t.sort(reverse=True)
  36.     # Print-out
  37.     for i in t:
  38.         percentage = round(float(i[0]) / float(total) * 100, 3)
  39.         print(str(i[0]) + ":\t'" + str(i[1]) + "'\t" + str(percentage) + "%")
  40.     return (t, total)
  41.  
  42. ''' Test '''
  43. # TODO: guard
  44. text = ""
  45. file = open("mbox-short.txt", "r")
  46. for line in file:
  47.     text += line
  48. result = most_frequent(text)
  49. file.close()
  50.