Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from time_this import *
- import copy
- import os
- class Hash:
- def __init__(self):
- self.table = [[None]] * 31
- def __str__(self):
- return self.table
- def next_prime(self):
- """
- Finds next prime number for new Hash Table capacity.
- returns: new prime number which is roughly double in size
- """
- prime = len(self.table)
- for p in range(prime * 2, prime * 2 + prime):
- for i in range(2, p):
- if p % i == 0:
- break
- else:
- return p
- def SortWord(self, word):
- """
- (Bubble Sort)
- Turns word into a list converted via 'ord'. The worst makes mulitple passhes through a list.
- It compares adjacent items and exchanges those that are out of order. Each pass through the list places the
- next largest value in its proper place. In essence, each item “bubbles” up to the location where it belongs.
- :param word:
- :return:
- """
- listed = list(word)
- alist = []
- for i in listed:
- alist.append(ord(i))
- for passnum in range(len(alist) - 1, 0, -1):
- for i in range(passnum):
- if alist[i] > alist[i + 1]:
- temp = alist[i]
- alist[i] = alist[i + 1]
- alist[i + 1] = temp
- newword = []
- for num in alist:
- newword.append(chr(num))
- return "".join(newword)
- def hash_function(self,word, TableCapacity):
- """
- Finds a new index value for the Hash Table by converting each letter into a number via 'ord'. The algorithm
- I chose is totaling each converted letter and multiplying by its weight depending on its index
- (i.e. the second letter in of word bear, the letter "e", will be worth (99 * (99-96)). The usage of a
- weight helps create a consistent way of preventing two different words from having the same value.
- Lastly, it will mod by the length of the list (TableCapacity), it will return the remainder
- which will be the new index for the table.
- :param TableCapacity, word:
- :return: new hash table index
- """
- total = 0
- for i in list(word):
- total = total + ord(i)*(ord(i)-96)
- while total >= TableCapacity:
- total = total % TableCapacity
- return total
- def LoadFactor(self):
- """
- Checks load factor by counting the None's and divide by the length of the current table
- subtracted by 1. It also counts how many slots are occupied by subtracting the length with the NoneCount.
- :param table:
- :return: Load factor rounded by 4 decimals.
- Number of occupied slots
- """
- NoneCount = 0
- for i in self.table:
- if i == [None]:
- NoneCount += 1
- LoadSize =(1 - NoneCount / len(self.table))
- occupied = (len(self.table) - NoneCount)
- return round(LoadSize, 4), occupied
- def store(self, word):
- """
- uses hash_function to find index value and stores the data in the corresponding index value
- in the Hash Table.
- The conflict algorithm is simple, if neither of the two if statements pass, the index will be added +1 so
- the word will traverse to the list to the right. If the index is at the end of the list, it will revert
- back to [0] and start from the beginning until there is no more conflict.
- This method will keep track of number of conflicts and unique items stored.
- :param word:
- :return: current Hash table, number of unique keys, and conflicts
- """
- IndexList = []
- unique = 0
- conflicts = 0
- index = self.hash_function(self.SortWord(word),len(self.table))
- StoreStatus = False
- while StoreStatus is not True:
- if self.LoadFactor()[0] > 0.7: # Checks if load factor is above 70%. If true: rehash table.
- self.rehash()
- else:
- if self.table[index] == [None]:
- """
- This checks to see if a KEY exists at given index
- If not, stores KEY and VALUE within the index.
- """
- self.table[index] = [self.SortWord(word)]
- self.table[index].append(word)
- IndexList.append(index)
- StoreStatus = True
- elif self.table[index][0] == self.SortWord(word) and word not in self.table[index]:
- """
- This checks if the sorted word already exists. Which means the KEY is already there.
- If so, then append the original (NOT SORTED) word to be viewed as a VALUE to the KEY.
- """
- self.table[index].append(word)
- StoreStatus = True
- else:
- if index >= (len(self.table) - 1): # When we're at the end of the list
- index = 0
- conflicts += 1
- else:
- index += 1
- conflicts += 1
- return self.table, unique, conflicts
- def get(self,word):
- """
- Accepts a key and finds and returns its anagrams.
- Because an index is a list that stores both keys and values, anything past the [0] index
- are the values. Where [0] is the key. The method returns anything past [1:] to show you the values.
- :param word:
- :return: all anagrams with given key and number of values stored with the key.
- """
- index = self.hash_function(word, len(self.table))
- for i in range(len(self.table)):
- if self.table[index][0] == word:
- values = self.table[index][1:]
- FixValuesFormat = (', '.join(values))
- length = len(values)
- return FixValuesFormat, length
- else:
- if index == (len(self.table)-1):
- index = 0
- index += 1
- values = self.table[index][1:]
- FixValuesFormat = (', '.join(values))
- length = len(values)
- return FixValuesFormat, length
- def rehash(self):
- """
- Keeps track of the contents of the hash table and their locations and is stored in a
- temp variable. A new hash table is created with the length of the next prime number (almost doubled).
- Using the old list, this method will know what/how to store the current items into the new hash table.
- :return:
- """
- IndexList = []
- print("Rehashing...........")
- temp = self.table
- tempindex = IndexList[:]
- IndexList.clear()
- self.table = [[None]] * self.next_prime()
- for i in tempindex:
- for word in temp[i][1:]:
- self.store(word)
- print("New and current list: ",(self.table),"\nwhich as a length of :", len(self.table),"\n\ncontinuing.... please wait")
- def main():
- h = Hash()
- IndexList = []
- w = open("Resources\output.txt", "w")
- DictionaryFileName = os.path.basename("Resources\dictionary_test.txt")
- opendictionary = open("Resources\dictionary_test.txt", "r")
- DictionaryList = opendictionary.read().splitlines()
- unique = 0
- conflicts = 0
- count = 0
- w.write("Program Start"
- "\nHash Table details: "+str(len(h.table))+" slots, "+str(h.LoadFactor()[1])+" occupied, load factor = "+str(h.LoadFactor()[0]))
- w.write("\nStart reading words from file "+ str(DictionaryFileName))
- for i in DictionaryList:
- count += 1
- if h.LoadFactor()[0] > 0.7:
- w.write("\n \t Before expansion: "+str(len(h.table)) + " slots, "+str(h.LoadFactor()[1]) +" occupied, load factor = "+str(h.LoadFactor()[0]))
- storeoutput = h.store(i)
- print(count)
- w.write("\n \t After expansion: "+str(len(h.table)) + " slots, "+str(h.LoadFactor()[1]) +" occupied, load factor = "+str(h.LoadFactor()[0]))
- else:
- storeoutput = h.store(i)
- conflicts += storeoutput[2]
- unique += storeoutput[1]
- w.write("\n\nEnd reading words from file "+DictionaryFileName+"\n"
- "HashTable details: "+str(len(h.table)) +" slots, "+str(h.LoadFactor()[1])+" occupied, load factor = "+str(h.LoadFactor()[0])+
- "\nNumber of unique keys inserted into HashTable = "+str(unique)+
- "\nNumber of keys conflicts encounter = "+ str(conflicts))
- opendictionary.close()
- InputFileName = os.path.basename("Resources\input.txt")
- openinput = open("Resources\input.txt", "r")
- InputList = openinput.read().splitlines()
- w.write("\n\nStart reading words from file "+ str(InputFileName))
- for j in InputList:
- print(h.get(j)[1])
- w.write("\n\t"+(str(j))+" "+str((h.get(j)[1]))+": "+str(h.get(j)[0])+" - this anagram took "+str(time_this(h.get, j)[0])+" seconds")
- w.write("\nEnd reading keys from file example_input.txt"
- "\nProgram end")
- openinput.close()
- w.close()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement