Advertisement
Guest User

Untitled

a guest
Nov 30th, 2015
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.34 KB | None | 0 0
  1. # File: Books.py
  2.  
  3. # Description: books
  4.  
  5. # Student Name: Marya Unwala
  6.  
  7. # Student UT EID: Miu66
  8.  
  9. # Course Name: CS 303E
  10.  
  11. # Unique Number: 50475
  12.  
  13. # Date Created: 11/20
  14.  
  15. # Date Last Modified: 11/
  16.  
  17.  
  18. # Create word dictionary from the comprehensive word list
  19. dictionary_list = {}
  20. def create_word_dict ():
  21. in_file= open("./words.txt","r")
  22.  
  23. for line in in_file:
  24. line = line.strip()
  25. dictionary_list[line] = 1
  26.  
  27.  
  28. def filter_string (line):
  29. s = ''
  30. if( line[-2:] == "'s"):
  31. s = line[:-2]
  32.  
  33. if(line[-2:]== "s'"):
  34. s = line[:-2]
  35. # return st[:-2]
  36.  
  37. for word in line:
  38. if (ord(word) >= 65 and ord(word) <= 90) or ( ord(word) >= 97 and ord(word) <= 122):
  39. s += word
  40. else:
  41. s += ' '
  42.  
  43. return s
  44.  
  45.  
  46.  
  47. # Returns a dictionary of words and their frequencies
  48. def getWordFreq (b_name):
  49. global dictionary_list
  50. #open the book
  51. book = open (b_name, 'r')
  52.  
  53. # create an empty set for words
  54. word_set = set()
  55.  
  56. # create a dictionary for word frequency
  57. word_dict = {}
  58.  
  59. # track the total nmuber of words
  60. total_words = 0
  61.  
  62. # read the book line by line <---------------------------------------------------------------here
  63. for line in book:
  64. words = line.strip().split()
  65. for word in words:
  66. word = filter_string (word) <-------------------------------------Here
  67.  
  68. # split the line into words
  69. word_list = line.split()
  70.  
  71. # add words to set and dictionary
  72. for word in word_list: # For each word
  73.  
  74.  
  75. # Adding word to the set
  76. word_set.add(word)
  77. total_words += 1
  78.  
  79. # add words to the dictionary
  80. if word in word_dict:
  81. word_dict[word] = word_dict[word] + 1
  82. else:
  83. word_dict[word] = 1
  84. #print(len(word_set))
  85.  
  86. capital_list = {}
  87. for word in word_dict.keys():
  88. if any(letter.isupper() for letter in word):
  89. capital_list[word] = word_dict[word]
  90.  
  91. for cap_word in capital_list:
  92. if cap_word.lower() in word_dict:
  93. word_dict[cap_word.lower()] = word_dict[cap_word.lower()] + capital_list[cap_word]
  94. elif cap_word.lower() in dictionary_list:
  95. word_dict[cap_word.lower()] = + dictionary_list[cap_word.lower()]
  96. word_set.add(cap_word.lower())
  97.  
  98. del word_dict[cap_word]
  99. word_set.remove(cap_word)
  100.  
  101.  
  102.  
  103. # close the file
  104. book.close()
  105.  
  106. # print total and unique words
  107.  
  108. num_unique_words = len (word_set)
  109. word_ratio = num_unique_words / total_words
  110.  
  111.  
  112. return num_unique_words, word_ratio, total_words, word_set, word_dict
  113. # return word_dict, word_set
  114.  
  115.  
  116.  
  117. # Compares the distinct words in two dictionaries
  118.  
  119. def wordComparison (author1,word_set_book1, word_dict_book1,author2,word_set_book2, word_dict_book2):
  120.  
  121. count_1 = 0
  122. for freq in word_dict_book1:
  123. count_1 = count_1 + word_dict_book1[freq]
  124.  
  125. count_2 = 0
  126. for freq in word_dict_book2:
  127. count_2 = count_2 + word_dict_book2[freq]
  128.  
  129.  
  130. num_unique_words1 = len (word_set_book1)
  131. word_ratio1 = num_unique_words1 / count_1
  132.  
  133. num_unique_words2 = len (word_set_book2)
  134. word_ratio2 = num_unique_words2 / count_2
  135.  
  136.  
  137. uniqueS1 = word_set_book1 - word_set_book2
  138. uniqueS2 = word_set_book2 - word_set_book1
  139.  
  140. uniqueS1 = [x.lower() for x in uniqueS1]
  141. uniqueS2 = [x.lower() for x in uniqueS2]
  142.  
  143.  
  144. beep =0
  145. for m in uniqueS1:
  146. beep += word_dict_book1[m]
  147.  
  148.  
  149. boop =0
  150. for k in uniqueS2:
  151. boop += word_dict_book2[k]
  152.  
  153.  
  154.  
  155. print(str(author1))
  156. print('Total distinct words =', num_unique_words1)
  157. print('Total words (including duplicates) =', count_1)
  158. print('Ratio (% of total distinct words to total words) = ' + str(100 * num_unique_words1/count_1))
  159.  
  160. print()
  161.  
  162. print(author2)
  163. print('Total distinct words =', num_unique_words2)
  164. print('Total words (including duplicates) =', count_2)
  165. print('Ratio (% of total distinct words to total words) = ' + str(100 * num_unique_words2/count_2))
  166.  
  167.  
  168. print()
  169.  
  170. print(str(author1) + ' used ' + str(len(uniqueS1)) + ' words that ' + str(author2)+ ' did not use.')
  171. print('Relative frequency of words used by ' + str(author1) + ' not in common with ' + ' = ' + str(beep/count_1 * 100))
  172.  
  173. print()
  174.  
  175. print(str(author2) + ' used ' + str(len(uniqueS2)) + ' words that ' + str(author1) + ' did not use.')
  176. print('Relative frequency of words used by ' + str(author2) + ' not in common with ' + str(author1) + ' = ' + str(boop/count_2 * 100 ))
  177.  
  178.  
  179.  
  180.  
  181. def main():
  182. # Create word dictionary from comprehensive word list
  183. create_word_dict()
  184.  
  185. # Enter names of the two books in electronic form
  186. book1 = input ("Enter name of first book: ")
  187. book2 = input ("Enter name of second book: ")
  188. print()
  189.  
  190.  
  191. # Enter names of the two authors
  192. author1 = input ("Enter last name of first author: ")
  193. author2 = input ("Enter last name of second author: ")
  194. print()
  195.  
  196. # Get the frequency of words used by the two authors
  197. # wordFreq1, var2,vqr3,var4 = getWordFreq (book1)
  198. num_unique_words_book1, word_ratio_book1, total_words_book1, word_set_book1, word_dict_book1 = getWordFreq(book1)
  199. num_unique_words_book2, word_ratio_book2, total_words_book2, word_set_book2, word_dict_book2 = getWordFreq(book2)
  200. #wordFreq2 = getWordFreq (book2)
  201.  
  202. # Compare the relative frequency of uncommon words used
  203. # by the two authors
  204. wordComparison(author1,word_set_book1, word_dict_book1,author2, word_set_book2, word_dict_book2)
  205.  
  206. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement