Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # File: Books.py
- # Description: books
- # Student Name: Marya Unwala
- # Student UT EID: Miu66
- # Course Name: CS 303E
- # Unique Number: 50475
- # Date Created: 11/20
- # Date Last Modified: 11/
- # Create word dictionary from the comprehensive word list
- dictionary_list = {}
- def create_word_dict ():
- in_file= open("./words.txt","r")
- for line in in_file:
- line = line.strip()
- dictionary_list[line] = 1
- def filter_string (line):
- s = ''
- if( line[-2:] == "'s"):
- s = line[:-2]
- if(line[-2:]== "s'"):
- s = line[:-2]
- # return st[:-2]
- for word in line:
- if (ord(word) >= 65 and ord(word) <= 90) or ( ord(word) >= 97 and ord(word) <= 122):
- s += word
- else:
- s += ' '
- return s
- # Returns a dictionary of words and their frequencies
- def getWordFreq (b_name):
- global dictionary_list
- #open the book
- book = open (b_name, 'r')
- # create an empty set for words
- word_set = set()
- # create a dictionary for word frequency
- word_dict = {}
- # track the total nmuber of words
- total_words = 0
- # read the book line by line <---------------------------------------------------------------here
- for line in book:
- words = line.strip().split()
- for word in words:
- word = filter_string (word) <-------------------------------------Here
- # split the line into words
- word_list = line.split()
- # add words to set and dictionary
- for word in word_list: # For each word
- # Adding word to the set
- word_set.add(word)
- total_words += 1
- # add words to the dictionary
- if word in word_dict:
- word_dict[word] = word_dict[word] + 1
- else:
- word_dict[word] = 1
- #print(len(word_set))
- capital_list = {}
- for word in word_dict.keys():
- if any(letter.isupper() for letter in word):
- capital_list[word] = word_dict[word]
- for cap_word in capital_list:
- if cap_word.lower() in word_dict:
- word_dict[cap_word.lower()] = word_dict[cap_word.lower()] + capital_list[cap_word]
- elif cap_word.lower() in dictionary_list:
- word_dict[cap_word.lower()] = + dictionary_list[cap_word.lower()]
- word_set.add(cap_word.lower())
- del word_dict[cap_word]
- word_set.remove(cap_word)
- # close the file
- book.close()
- # print total and unique words
- num_unique_words = len (word_set)
- word_ratio = num_unique_words / total_words
- return num_unique_words, word_ratio, total_words, word_set, word_dict
- # return word_dict, word_set
- # Compares the distinct words in two dictionaries
- def wordComparison (author1,word_set_book1, word_dict_book1,author2,word_set_book2, word_dict_book2):
- count_1 = 0
- for freq in word_dict_book1:
- count_1 = count_1 + word_dict_book1[freq]
- count_2 = 0
- for freq in word_dict_book2:
- count_2 = count_2 + word_dict_book2[freq]
- num_unique_words1 = len (word_set_book1)
- word_ratio1 = num_unique_words1 / count_1
- num_unique_words2 = len (word_set_book2)
- word_ratio2 = num_unique_words2 / count_2
- uniqueS1 = word_set_book1 - word_set_book2
- uniqueS2 = word_set_book2 - word_set_book1
- uniqueS1 = [x.lower() for x in uniqueS1]
- uniqueS2 = [x.lower() for x in uniqueS2]
- beep =0
- for m in uniqueS1:
- beep += word_dict_book1[m]
- boop =0
- for k in uniqueS2:
- boop += word_dict_book2[k]
- print(str(author1))
- print('Total distinct words =', num_unique_words1)
- print('Total words (including duplicates) =', count_1)
- print('Ratio (% of total distinct words to total words) = ' + str(100 * num_unique_words1/count_1))
- print()
- print(author2)
- print('Total distinct words =', num_unique_words2)
- print('Total words (including duplicates) =', count_2)
- print('Ratio (% of total distinct words to total words) = ' + str(100 * num_unique_words2/count_2))
- print()
- print(str(author1) + ' used ' + str(len(uniqueS1)) + ' words that ' + str(author2)+ ' did not use.')
- print('Relative frequency of words used by ' + str(author1) + ' not in common with ' + ' = ' + str(beep/count_1 * 100))
- print()
- print(str(author2) + ' used ' + str(len(uniqueS2)) + ' words that ' + str(author1) + ' did not use.')
- print('Relative frequency of words used by ' + str(author2) + ' not in common with ' + str(author1) + ' = ' + str(boop/count_2 * 100 ))
- def main():
- # Create word dictionary from comprehensive word list
- create_word_dict()
- # Enter names of the two books in electronic form
- book1 = input ("Enter name of first book: ")
- book2 = input ("Enter name of second book: ")
- print()
- # Enter names of the two authors
- author1 = input ("Enter last name of first author: ")
- author2 = input ("Enter last name of second author: ")
- print()
- # Get the frequency of words used by the two authors
- # wordFreq1, var2,vqr3,var4 = getWordFreq (book1)
- num_unique_words_book1, word_ratio_book1, total_words_book1, word_set_book1, word_dict_book1 = getWordFreq(book1)
- num_unique_words_book2, word_ratio_book2, total_words_book2, word_set_book2, word_dict_book2 = getWordFreq(book2)
- #wordFreq2 = getWordFreq (book2)
- # Compare the relative frequency of uncommon words used
- # by the two authors
- wordComparison(author1,word_set_book1, word_dict_book1,author2, word_set_book2, word_dict_book2)
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement