Advertisement
Guest User

Untitled

a guest
Nov 17th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.28 KB | None | 0 0
  1. from time_this import *
  2. import copy
  3. import os
  4.  
  5. class Hash:
  6.  
  7. def __init__(self):
  8. self.table = [[None]] * 31
  9.  
  10. def __str__(self):
  11. return self.table
  12.  
  13. def next_prime(self):
  14. """
  15. Finds next prime number for new Hash Table capacity.
  16.  
  17. returns: new prime number which is roughly double in size
  18. """
  19. prime = len(self.table)
  20. for p in range(prime * 2, prime * 2 + prime):
  21. for i in range(2, p):
  22. if p % i == 0:
  23. break
  24. else:
  25. return p
  26.  
  27. def SortWord(self, word):
  28. """
  29. (Bubble Sort)
  30. Turns word into a list converted via 'ord'. The worst makes mulitple passhes through a list.
  31. It compares adjacent items and exchanges those that are out of order. Each pass through the list places the
  32. next largest value in its proper place. In essence, each item “bubbles” up to the location where it belongs.
  33. :param word:
  34. :return:
  35. """
  36. listed = list(word)
  37. alist = []
  38. for i in listed:
  39. alist.append(ord(i))
  40. for passnum in range(len(alist) - 1, 0, -1):
  41. for i in range(passnum):
  42. if alist[i] > alist[i + 1]:
  43. temp = alist[i]
  44. alist[i] = alist[i + 1]
  45. alist[i + 1] = temp
  46. newword = []
  47. for num in alist:
  48. newword.append(chr(num))
  49. return "".join(newword)
  50.  
  51. def hash_function(self,word, TableCapacity):
  52. """
  53. Finds a new index value for the Hash Table by converting each letter into a number via 'ord'. The algorithm
  54. I chose is totaling each converted letter and multiplying by its weight depending on its index
  55. (i.e. the second letter in of word bear, the letter "e", will be worth (99 * (99-96)). The usage of a
  56. weight helps create a consistent way of preventing two different words from having the same value.
  57. Lastly, it will mod by the length of the list (TableCapacity), it will return the remainder
  58. which will be the new index for the table.
  59.  
  60. :param TableCapacity, word:
  61. :return: new hash table index
  62. """
  63. total = 0
  64. for i in list(word):
  65. total = total + ord(i)*(ord(i)-96)
  66. while total >= TableCapacity:
  67. total = total % TableCapacity
  68. return total
  69.  
  70. def LoadFactor(self):
  71. """
  72. Checks load factor by counting the None's and divide by the length of the current table
  73. subtracted by 1. It also counts how many slots are occupied by subtracting the length with the NoneCount.
  74.  
  75. :param table:
  76. :return: Load factor rounded by 4 decimals.
  77. Number of occupied slots
  78. """
  79. NoneCount = 0
  80. for i in self.table:
  81. if i == [None]:
  82. NoneCount += 1
  83. LoadSize =(1 - NoneCount / len(self.table))
  84. occupied = (len(self.table) - NoneCount)
  85. return round(LoadSize, 4), occupied
  86.  
  87. def store(self, word):
  88. """
  89. uses hash_function to find index value and stores the data in the corresponding index value
  90. in the Hash Table.
  91.  
  92. The conflict algorithm is simple, if neither of the two if statements pass, the index will be added +1 so
  93. the word will traverse to the list to the right. If the index is at the end of the list, it will revert
  94. back to [0] and start from the beginning until there is no more conflict.
  95.  
  96. This method will keep track of number of conflicts and unique items stored.
  97.  
  98.  
  99. :param word:
  100. :return: current Hash table, number of unique keys, and conflicts
  101. """
  102. IndexList = []
  103. unique = 0
  104. conflicts = 0
  105. index = self.hash_function(self.SortWord(word),len(self.table))
  106. StoreStatus = False
  107. while StoreStatus is not True:
  108. if self.LoadFactor()[0] > 0.7: # Checks if load factor is above 70%. If true: rehash table.
  109. self.rehash()
  110. else:
  111.  
  112. if self.table[index] == [None]:
  113. """
  114. This checks to see if a KEY exists at given index
  115. If not, stores KEY and VALUE within the index.
  116. """
  117. self.table[index] = [self.SortWord(word)]
  118. self.table[index].append(word)
  119. IndexList.append(index)
  120. StoreStatus = True
  121. elif self.table[index][0] == self.SortWord(word) and word not in self.table[index]:
  122. """
  123. This checks if the sorted word already exists. Which means the KEY is already there.
  124. If so, then append the original (NOT SORTED) word to be viewed as a VALUE to the KEY.
  125. """
  126. self.table[index].append(word)
  127. StoreStatus = True
  128. else:
  129. if index >= (len(self.table) - 1): # When we're at the end of the list
  130. index = 0
  131. conflicts += 1
  132. else:
  133. index += 1
  134. conflicts += 1
  135. return self.table, unique, conflicts
  136.  
  137. def get(self,word):
  138. """
  139. Accepts a key and finds and returns its anagrams.
  140.  
  141. Because an index is a list that stores both keys and values, anything past the [0] index
  142. are the values. Where [0] is the key. The method returns anything past [1:] to show you the values.
  143.  
  144. :param word:
  145. :return: all anagrams with given key and number of values stored with the key.
  146. """
  147. index = self.hash_function(word, len(self.table))
  148. for i in range(len(self.table)):
  149. if self.table[index][0] == word:
  150. values = self.table[index][1:]
  151. FixValuesFormat = (', '.join(values))
  152. length = len(values)
  153. return FixValuesFormat, length
  154. else:
  155. if index == (len(self.table)-1):
  156. index = 0
  157. index += 1
  158. values = self.table[index][1:]
  159. FixValuesFormat = (', '.join(values))
  160. length = len(values)
  161. return FixValuesFormat, length
  162.  
  163. def rehash(self):
  164. """
  165. Keeps track of the contents of the hash table and their locations and is stored in a
  166. temp variable. A new hash table is created with the length of the next prime number (almost doubled).
  167. Using the old list, this method will know what/how to store the current items into the new hash table.
  168. :return:
  169. """
  170. IndexList = []
  171. print("Rehashing...........")
  172. temp = self.table
  173. tempindex = IndexList[:]
  174. IndexList.clear()
  175. self.table = [[None]] * self.next_prime()
  176. for i in tempindex:
  177. for word in temp[i][1:]:
  178. self.store(word)
  179. print("New and current list: ",(self.table),"\nwhich as a length of :", len(self.table),"\n\ncontinuing.... please wait")
  180.  
  181. def main():
  182.  
  183. h = Hash()
  184. IndexList = []
  185.  
  186. w = open("Resources\output.txt", "w")
  187.  
  188. DictionaryFileName = os.path.basename("Resources\dictionary_test.txt")
  189. opendictionary = open("Resources\dictionary_test.txt", "r")
  190. DictionaryList = opendictionary.read().splitlines()
  191.  
  192. unique = 0
  193. conflicts = 0
  194. count = 0
  195. w.write("Program Start"
  196. "\nHash Table details: "+str(len(h.table))+" slots, "+str(h.LoadFactor()[1])+" occupied, load factor = "+str(h.LoadFactor()[0]))
  197. w.write("\nStart reading words from file "+ str(DictionaryFileName))
  198. for i in DictionaryList:
  199. count += 1
  200. if h.LoadFactor()[0] > 0.7:
  201. w.write("\n \t Before expansion: "+str(len(h.table)) + " slots, "+str(h.LoadFactor()[1]) +" occupied, load factor = "+str(h.LoadFactor()[0]))
  202. storeoutput = h.store(i)
  203. print(count)
  204. w.write("\n \t After expansion: "+str(len(h.table)) + " slots, "+str(h.LoadFactor()[1]) +" occupied, load factor = "+str(h.LoadFactor()[0]))
  205. else:
  206. storeoutput = h.store(i)
  207. conflicts += storeoutput[2]
  208. unique += storeoutput[1]
  209. w.write("\n\nEnd reading words from file "+DictionaryFileName+"\n"
  210. "HashTable details: "+str(len(h.table)) +" slots, "+str(h.LoadFactor()[1])+" occupied, load factor = "+str(h.LoadFactor()[0])+
  211. "\nNumber of unique keys inserted into HashTable = "+str(unique)+
  212. "\nNumber of keys conflicts encounter = "+ str(conflicts))
  213.  
  214. opendictionary.close()
  215. InputFileName = os.path.basename("Resources\input.txt")
  216. openinput = open("Resources\input.txt", "r")
  217. InputList = openinput.read().splitlines()
  218. w.write("\n\nStart reading words from file "+ str(InputFileName))
  219. for j in InputList:
  220. print(h.get(j)[1])
  221. w.write("\n\t"+(str(j))+" "+str((h.get(j)[1]))+": "+str(h.get(j)[0])+" - this anagram took "+str(time_this(h.get, j)[0])+" seconds")
  222. w.write("\nEnd reading keys from file example_input.txt"
  223. "\nProgram end")
  224. openinput.close()
  225. w.close()
  226.  
  227. if __name__ == "__main__":
  228. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement