Advertisement
Mars83

10-3

Oct 9th, 2011
295
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.48 KB | None | 0 0
  1. #! /usr/bin/env python3.2
  2. # -*- coding: utf-8 -*-
  3.  
  4. # main.py
  5. """ Task: Exercise 10.3
  6.    Write a function called most_frequent that takes a string and prints the
  7.    letters in decreasing order of frequency. Find text samples from several
  8.    different languages and see how letter frequency varies between languages.
  9.    Compare your results with the tables at
  10.    wikipedia.org/wiki/Letter_frequencies.
  11. """
  12.  
  13. ''' Imports '''
  14. import string
  15.  
  16. ''' Functions '''
  17. def most_frequent(text):
  18.     total = 0   # total count without punctuation etc.
  19.     letters = dict()
  20.     t = list()
  21.     r = []      # 0, 1, 2, ... 7, 8, 9
  22.     for i in range(10):
  23.         r.append(str(i))
  24.     for letter in text:
  25.         if (letter in string.punctuation or letter in [' ', '\n', '\t']
  26.             or letter in r):
  27.            continue     # skip non-alphabetic letters
  28.        # TODO: translate ä -> a (...)
  29.         letter = letter.lower()
  30.         letters[letter] = letters.get(letter, 0) + 1
  31.         total += 1
  32.     # Create a list to sort by frequency
  33.     for items in letters:
  34.         t.append((letters[items], items))
  35.         t.sort(reverse=True)
  36.     # Print-out
  37.     for i in t:
  38.         percentage = round(float(i[0]) / float(total) * 100, 3)
  39.         print(str(i[0]) + ":\t'" + str(i[1]) + "'\t" + str(percentage) + "%")
  40.     return (t, total)
  41.  
  42. ''' Test '''
  43. # TODO: guard
  44. text = ""
  45. file = open("mbox-short.txt", "r")
  46. for line in file:
  47.     text += line
  48. result = most_frequent(text)
  49. file.close()
  50.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement