Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Find the ten commonest words in "Alice In Wonderland"
- import re
- word_dict = {}
- freq_list = []
- with open("Alice.txt") as alice:
- text = alice.read() # Read the whole text in one go!
- text = text.lower() # Convert the whole text to lower case
- # Split the text on all word separators
- text_list2 = re.split(', |; |: |\. |"|[ ]+|\'|\n', text)
- # Build a dictionary of all words in the text, keeping a tally of each word
- for word in text_list2:
- if word in word_dict:
- word_dict[word] += 1
- else:
- word_dict[word] = 1
- # Now construct a list of tuples holding (count, word)
- for word in word_dict.keys():
- freq_list.append((word_dict[word], word))
- # Sort the word list by count (in descending order)
- freq_list.sort(reverse=True)
- # Print the top 10 commonest words
- for x in range(1, 11): # (skip the first, which is "space" [goodness knows why it is])
- tup = freq_list[x]
- print(tup[1].ljust(12), tup[0])
Add Comment
Please, Sign In to add comment