Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python3.2
- # -*- coding: utf-8 -*-
- # main.py
- """ Task: Exercise 10.3
- Write a function called most_frequent that takes a string and prints the
- letters in decreasing order of frequency. Find text samples from several
- different languages and see how letter frequency varies between languages.
- Compare your results with the tables at
- wikipedia.org/wiki/Letter_frequencies.
- """
- ''' Imports '''
- import string
- ''' Functions '''
- def most_frequent(text):
- total = 0 # total count without punctuation etc.
- letters = dict()
- t = list()
- r = [] # 0, 1, 2, ... 7, 8, 9
- for i in range(10):
- r.append(str(i))
- for letter in text:
- if (letter in string.punctuation or letter in [' ', '\n', '\t']
- or letter in r):
- continue # skip non-alphabetic letters
- # TODO: translate ä -> a (...)
- letter = letter.lower()
- letters[letter] = letters.get(letter, 0) + 1
- total += 1
- # Create a list to sort by frequency
- for items in letters:
- t.append((letters[items], items))
- t.sort(reverse=True)
- # Print-out
- for i in t:
- percentage = round(float(i[0]) / float(total) * 100, 3)
- print(str(i[0]) + ":\t'" + str(i[1]) + "'\t" + str(percentage) + "%")
- return (t, total)
- ''' Test '''
- # TODO: guard
- text = ""
- file = open("mbox-short.txt", "r")
- for line in file:
- text += line
- result = most_frequent(text)
- file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement