Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ''' Word_frequency3a.py
- experiments with string processing
- preprocess the string and do a word frequency count
- words with matching frequency are in order too
- '''
- from string import punctuation
- from collections import Counter
- # sample text for testing (could come from a text file)
- text = """\
- A mouse can slip through a hole the size of a penny.
- A giraffe can clean its ears with its tongue.
- A giraffe's heart beats 50 times a minute.
- A dog's heart beats 100 times a minute.
- A hedgehog's heart beats 300 times a minute.
- """
- # since giraffe's would turn into giraffes, optionally remove 's
- text2 = text.replace("'s", "")
- # remove punctuation marks and change to lower case
- text3 = ''.join(c for c in text2.lower() if c not in punctuation)
- # text3.split() splits text3 at white spaces
- word_list = text3.split()
- # creates a list of (word, frequency) tuples of the 10
- # most common words sorted by frequency
- wf_tuple_list = Counter(word_list).most_common(10)
- for w, f in wf_tuple_list:
- # newer string formatting style Python27 and higher
- print("{:3d} {}".format(f, w))
- ''' result (10 most common words) ...
- 10 a
- 3 heart
- 3 times
- 3 beats
- 3 minute
- 2 can
- 2 its
- 2 giraffe
- 1 slip
- 1 mouse
- '''
Advertisement
Add Comment
Please, Sign In to add comment