Untitled

#  File: Books.py

#  Description: books

#  Student Name: Marya Unwala

#  Student UT EID: Miu66

#  Course Name: CS 303E

#  Unique Number: 50475

#  Date Created: 11/20

#  Date Last Modified: 11/


# Create word dictionary from the comprehensive word list
dictionary_list = {}
def create_word_dict ():
  in_file= open("./words.txt","r")

  for line in in_file:
    line = line.strip()
    dictionary_list[line] = 1


def filter_string (line):
  s = ''
  if( line[-2:] == "'s"):
    s = line[:-2]

  if(line[-2:]== "s'"):
    s = line[:-2]
    # return st[:-2]

  for word in line:
    if  (ord(word) >= 65 and ord(word) <= 90)  or ( ord(word) >= 97 and ord(word) <= 122):
      s += word
    else:
      s += ' '

  return s


# Returns a dictionary of words and their frequencies
def getWordFreq (b_name):
  global dictionary_list
  #open the book
  book = open (b_name, 'r')

  # create an empty set for words
  word_set = set()

  # create a dictionary for word frequency
  word_dict = {}

  # track the total nmuber of words
  total_words = 0

  # read the book line by line <---------------------------------------------------------------here
  for line in book:
    words = line.strip().split()
    for word in words:
      word = filter_string (word) <-------------------------------------Here

    # split the line into words
    word_list = line.split()

    # add words to set and dictionary
    for word in word_list: # For each word


      # Adding word to the set
      word_set.add(word)
      total_words += 1

      # add words to the dictionary
      if word in word_dict:
        word_dict[word] = word_dict[word] + 1
      else:
        word_dict[word] = 1
  #print(len(word_set))

  capital_list = {}
  for word in word_dict.keys():
    if any(letter.isupper() for letter in word):
      capital_list[word] = word_dict[word]

  for cap_word in capital_list:
    if cap_word.lower() in word_dict:
      word_dict[cap_word.lower()] = word_dict[cap_word.lower()] + capital_list[cap_word]
    elif cap_word.lower() in dictionary_list:
      word_dict[cap_word.lower()] = + dictionary_list[cap_word.lower()]
      word_set.add(cap_word.lower())

    del word_dict[cap_word]
    word_set.remove(cap_word)


  # close the file
  book.close()

 # print total and unique words

  num_unique_words = len (word_set)
  word_ratio = num_unique_words / total_words


  return num_unique_words, word_ratio, total_words, word_set, word_dict
  # return word_dict, word_set


# Compares the distinct words in two dictionaries

def wordComparison (author1,word_set_book1, word_dict_book1,author2,word_set_book2, word_dict_book2):

  count_1 = 0
  for freq in word_dict_book1:
    count_1 = count_1 + word_dict_book1[freq]

  count_2 = 0
  for freq in word_dict_book2:
    count_2 = count_2 + word_dict_book2[freq]


  num_unique_words1 = len (word_set_book1)
  word_ratio1 = num_unique_words1 / count_1

  num_unique_words2 = len (word_set_book2)
  word_ratio2 = num_unique_words2 / count_2


  uniqueS1 = word_set_book1 - word_set_book2
  uniqueS2 = word_set_book2 - word_set_book1

  uniqueS1 = [x.lower() for x in uniqueS1]
  uniqueS2 = [x.lower() for x in uniqueS2]


  beep =0
  for m in uniqueS1:
    beep += word_dict_book1[m]


  boop =0
  for k in uniqueS2:
    boop += word_dict_book2[k]


  print(str(author1))
  print('Total distinct words =', num_unique_words1)
  print('Total words (including duplicates) =', count_1)
  print('Ratio (% of total distinct words to total words) = ' + str(100 * num_unique_words1/count_1))

  print()

  print(author2)
  print('Total distinct words =', num_unique_words2)
  print('Total words (including duplicates) =', count_2)
  print('Ratio (% of total distinct words to total words) = ' + str(100 * num_unique_words2/count_2))


  print()

  print(str(author1) + ' used ' + str(len(uniqueS1)) + ' words that ' + str(author2)+ ' did not use.')
  print('Relative frequency of words used by ' + str(author1) + ' not in common with ' + ' = ' + str(beep/count_1 * 100))

  print()

  print(str(author2) + ' used ' + str(len(uniqueS2)) + ' words that ' + str(author1) + ' did not use.')
  print('Relative frequency of words used by ' + str(author2) + ' not in common with ' + str(author1) + ' = ' + str(boop/count_2  * 100 ))


def main():
  # Create word dictionary from comprehensive word list
  create_word_dict()

  # Enter names of the two books in electronic form
  book1 = input ("Enter name of first book: ")
  book2 = input ("Enter name of second book: ")
  print()


  # Enter names of the two authors
  author1 = input ("Enter last name of first author: ")
  author2 = input ("Enter last name of second author: ")
  print()

  # Get the frequency of words used by the two authors
  # wordFreq1, var2,vqr3,var4 = getWordFreq (book1)
  num_unique_words_book1, word_ratio_book1, total_words_book1, word_set_book1, word_dict_book1  = getWordFreq(book1)
  num_unique_words_book2, word_ratio_book2, total_words_book2, word_set_book2, word_dict_book2 = getWordFreq(book2)
  #wordFreq2 = getWordFreq (book2)

  # Compare the relative frequency of uncommon words used
  # by the two authors
  wordComparison(author1,word_set_book1, word_dict_book1,author2, word_set_book2, word_dict_book2)

main()