Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #read in a txt x
- #change txt input to list x
- #take out all punctuations x
- #split into words x
- #def that can tell what paragraph each word is in
- #puts the words into alphabetical order and prints out what paragrah it was in
- #def that prints how often each word comes up into a top 10 list
- #top 20 list
- def open_file(filename): #opens a txt
- file_object = open(filename, "r")
- return file_object
- def list_object(file_object):#turns txt into a list
- lists = []
- for word in file_object:
- lists.append(word.replace(",", "").replace(".", ""))
- return lists
- def words_object(lists): #split list into words
- words = []
- for word in lists:
- words.extend(word.split())
- return words
- def alphab_object(words): #puts the list into a alphabetical order
- words.sort()
- return words
- def wordcount_object(words): #finds the top most common words in the list
- count_dict = {}
- for word in words:
- if word in count_dict:
- count_dict[word] +=1
- else:
- count_dict[word] = 1
- sortedValue = sorted(count_dict.items(), key=lambda x: (x[1]))
- return sortedValue
- def main():
- try:
- filename = input("Enter name of file: ")
- file_object = open_file(filename)
- lists = list_object(file_object)
- words = words_object(lists)
- words = alphab_object(words)
- sortedValue = wordcount_object(words)
- print("The paragraph index:")
- for word in words:
- print(word)
- print("")
- counterTop10 = -1
- print("The highest 10 counts:")
- while counterTop10 > -11:
- key, value = sortedValue[counterTop10]
- print(f"{key}: {value}")
- counterTop10 -=1
- print("")
- counterTop20 =-1
- print("The highest 20 counts:")
- while counterTop20 > -21:
- key, value = sortedValue[counterTop20]
- print(f"{key}: {value}")
- counterTop20 -=1
- except FileNotFoundError:
- print("Filename " + filename + " not found!")
- main ()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement