SHARE
TWEET

Untitled

a guest Oct 21st, 2019 78 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #read in a txt x
  2. #change txt input to list x
  3. #take out all punctuations x
  4. #split into words x
  5. #def that can tell what paragraph each word is in
  6. #puts the words into alphabetical order and prints out what paragrah it was in
  7. #def that prints how often each word comes up into a top 10 list
  8. #top 20 list
  9.  
  10. def open_file(filename): #opens a txt
  11.     file_object = open(filename, "r")
  12.     return file_object
  13.  
  14. def list_object(file_object):#turns txt into a list
  15.     lists = []
  16.     for word in file_object:
  17.         lists.append(word.replace(",", "").replace(".", ""))
  18.     return lists
  19.  
  20. def words_object(lists): #split list into words
  21.     words = []
  22.     for word in lists:
  23.         words.extend(word.split())
  24.     return words
  25.  
  26. def alphab_object(words): #puts the list into a alphabetical order
  27.     words.sort()
  28.     return words
  29.  
  30. def wordcount_object(words): #finds the top most common words in the list
  31.     count_dict = {}
  32.     for word in words:
  33.         if word in count_dict:
  34.             count_dict[word] +=1
  35.         else:
  36.             count_dict[word] = 1
  37.     sortedValue = sorted(count_dict.items(), key=lambda x: (x[1]))
  38.     return sortedValue
  39.  
  40.  
  41.  
  42. def main():
  43.     try:  
  44.         filename = input("Enter name of file: ")
  45.         file_object = open_file(filename)
  46.         lists = list_object(file_object)
  47.         words = words_object(lists)
  48.         words = alphab_object(words)
  49.         sortedValue = wordcount_object(words)
  50.         print("The paragraph index:")
  51.         for word in words:
  52.             print(word)
  53.         print("")
  54.         counterTop10 = -1
  55.         print("The highest 10 counts:")
  56.         while counterTop10 > -11:
  57.             key, value = sortedValue[counterTop10]
  58.             print(f"{key}: {value}")
  59.             counterTop10 -=1
  60.         print("")
  61.         counterTop20 =-1
  62.         print("The highest 20 counts:")
  63.         while counterTop20 > -21:
  64.             key, value = sortedValue[counterTop20]
  65.             print(f"{key}: {value}")
  66.             counterTop20 -=1
  67.     except FileNotFoundError:
  68.         print("Filename " + filename + " not found!")
  69.  
  70. main ()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top