Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
161
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.08 KB | None | 0 0
  1. #read in a txt x
  2. #change txt input to list x
  3. #take out all punctuations x
  4. #split into words x
  5. #def that can tell what paragraph each word is in
  6. #puts the words into alphabetical order and prints out what paragrah it was in
  7. #def that prints how often each word comes up into a top 10 list
  8. #top 20 list
  9.  
  10. def open_file(filename): #opens a txt
  11. file_object = open(filename, "r")
  12. return file_object
  13.  
  14. def list_object(file_object):#turns txt into a list
  15. lists = []
  16. for word in file_object:
  17. lists.append(word.replace(",", "").replace(".", ""))
  18. return lists
  19.  
  20. def words_object(lists): #split list into words
  21. words = []
  22. for word in lists:
  23. words.extend(word.split())
  24. return words
  25.  
  26. def alphab_object(words): #puts the list into a alphabetical order
  27. words.sort()
  28. return words
  29.  
  30. def wordcount_object(words): #finds the top most common words in the list
  31. count_dict = {}
  32. for word in words:
  33. if word in count_dict:
  34. count_dict[word] +=1
  35. else:
  36. count_dict[word] = 1
  37. sortedValue = sorted(count_dict.items(), key=lambda x: (x[1]))
  38. return sortedValue
  39.  
  40.  
  41.  
  42. def main():
  43. try:
  44. filename = input("Enter name of file: ")
  45. file_object = open_file(filename)
  46. lists = list_object(file_object)
  47. words = words_object(lists)
  48. words = alphab_object(words)
  49. sortedValue = wordcount_object(words)
  50. print("The paragraph index:")
  51. for word in words:
  52. print(word)
  53. print("")
  54. counterTop10 = -1
  55. print("The highest 10 counts:")
  56. while counterTop10 > -11:
  57. key, value = sortedValue[counterTop10]
  58. print(f"{key}: {value}")
  59. counterTop10 -=1
  60. print("")
  61. counterTop20 =-1
  62. print("The highest 20 counts:")
  63. while counterTop20 > -21:
  64. key, value = sortedValue[counterTop20]
  65. print(f"{key}: {value}")
  66. counterTop20 -=1
  67. except FileNotFoundError:
  68. print("Filename " + filename + " not found!")
  69.  
  70. main ()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement