acclivity

pyCommonestWordsInBook

Mar 23rd, 2021 (edited)
201
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.02 KB | None | 0 0
  1. # Find the ten commonest words in "Alice In Wonderland"
  2. import re
  3. word_dict = {}
  4. freq_list = []
  5. with open("Alice.txt") as alice:
  6.     text = alice.read()     # Read the whole text in one go!
  7.     text = text.lower()     # Convert the whole text to lower case
  8.     # Split the text on all word separators
  9.     text_list2 = re.split(', |; |: |\. |"|[ ]+|\'|\n', text)
  10.     # Build a dictionary of all words in the text, keeping a tally of each word
  11.     for word in text_list2:
  12.         if word in word_dict:
  13.             word_dict[word] += 1
  14.         else:
  15.             word_dict[word] = 1
  16.     # Now construct a list of tuples holding (count, word)
  17.     for word in word_dict.keys():
  18.         freq_list.append((word_dict[word], word))
  19.     # Sort the word list by count (in descending order)
  20.     freq_list.sort(reverse=True)
  21.     # Print the top 10 commonest words
  22.     for x in range(1, 11):          # (skip the first, which is "space" [goodness knows why it is])
  23.         tup = freq_list[x]
  24.         print(tup[1].ljust(12), tup[0])
  25.  
Add Comment
Please, Sign In to add comment