Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Indexer:
- def __init__(self, numberOfFeatures=75, numberOfClasses=3):
- self.numberOfFeatures = numberOfFeatures
- self.numberOfClasses = numberOfClasses
- self.Xindex = AnnoyIndex(75)
- self.Yindex = AnnoyIndex(3)
- self.Xindex.verbose(1)
- self.Yindex.verbose(1)
- self.bufferXlist = []
- self.bufferYlist = []
- self.built = False
- self.searchNumber = 25
- def buildIndex(self, numOfTrees=500):
- print("X Items before build : %s" % self.Xindex.get_n_items())
- print("Y Items before build : %s" % self.Yindex.get_n_items())
- print("\nBuilding index with %s trees ..." % numOfTrees)
- self.Xindex.build(numOfTrees)
- self.Yindex.build(10)
- self.built = True
- print("X Items after build : %s" % self.Xindex.get_n_items())
- print("Y Items after build : %s" % self.Yindex.get_n_items())
- print("Index built OK\n")
- def addToIndex(self, X, Y):
- if (self.built == False):
- index = self.generateIndexNumber()
- self.Xindex.add_item(index, X)
- self.Yindex.add_item(index, Y)
- else:
- raise Exception('Cannot add items to built indices')
- def generateIndexNumber(self):
- return self.Xindex.get_n_items() + 1
- def predict(self, X, debug=False):
- testSearch = self.Xindex.get_nns_by_vector(X, self.searchNumber, include_distances=True)
- indices, distances = testSearch
- if debug:
- print("\n Indices")
- print(indices)
- print("\n Distances")
- print(distances)
- print("\n Length")
- print(self.Yindex.get_n_items())
- for inex in indices:
- if inex > self.Yindex.get_n_items():
- print(inex)
- mappedAnswers = [self.Yindex.get_item_vector(indexInTrainSet) for indexInTrainSet in indices]
- weightedMovementLow = np.rint(np.average([a[0] for a in mappedAnswers], weights=[1 - x for x in distances]))
- weightedMovementHigh = np.rint(np.average([a[1] for a in mappedAnswers], weights=[1 - x for x in distances]))
- weightedResult = np.rint(np.average([a[2] for a in mappedAnswers], weights=[1 - x for x in distances]))
- return [weightedMovementLow, weightedMovementHigh, weightedResult]
- def addItems(self, Xlist, Ylist):
- if (len(Xlist) != len(Ylist)):
- print("Xlist length does not match Ylist length")
- return False
- for i in range(len(Xlist)):
- self.addToIndex(Xlist[i], Ylist[i])
- return True
- def testItems(self, XlistTest, YlistTest, printDebug=False):
- if (len(XlistTest) != len(YlistTest)):
- print("XlistTest length does not match YlistTest length")
- return False
- results = []
- # for i in tqdm(range(len(XlistTest)), desc="Running tests", ncols=100, leave=True):
- for i in range(len(XlistTest)):
- prediction = self.predict(XlistTest[i])
- results.append(prediction[2] == YlistTest[i][2])
- all = len(results)
- good = results.count(True)
- percentage = ((good / all) * 100)
- if (printDebug == True):
- print("All %s" % all)
- print("Good %s" % good)
- print("Ok to %s %% " % percentage)
- return percentage
- def bufferItemsToTest(self, Xlist, Ylist):
- self.bufferXlist.extend(Xlist)
- self.bufferYlist.extend(Ylist)
- def runBufferedTests(self):
- return self.testItems(self.bufferXlist, self.bufferYlist, True)
- def saveIndex(self, fileName):
- print("\nSaving indices...")
- dirPath = path.dirname(fileName)
- filePath = path.basename(fileName)
- if not path.exists(dirPath):
- makedirs(dirPath, exist_ok=True)
- print(self.Xindex)
- print(self.Yindex)
- # 1516931
- print("\t saving to : %s/X-%s" % (dirPath, filePath))
- print("\t saving to : %s/Y-%s" % (dirPath, filePath))
- print("X Items before save : %s" % self.Xindex.get_n_items())
- print("Y Items before save : %s" % self.Yindex.get_n_items())
- a = self.Xindex.save("%s/X-%s" % (dirPath, filePath))
- b = self.Yindex.save("%s/Y-%s" % (dirPath, filePath))
- print("Save resulted : %s" % a)
- print("Save resulted : %s" % b)
- print("X Items after save : %s" % self.Xindex.get_n_items())
- print("Y Items after save : %s" % self.Yindex.get_n_items())
- print("\nIndices saved OK")
- return True
- def load(self, fileName):
- dirPath = path.dirname(fileName)
- filePath = path.basename(fileName)
- # 1516931
- print("\t loading from : %s/X-%s" % (dirPath, filePath))
- print("\t loading from : %s/Y-%s" % (dirPath, filePath))
- self.Xindex.unload()
- self.Yindex.unload()
- o = AnnoyIndex(self.numberOfFeatures)
- p = AnnoyIndex(self.numberOfClasses)
- a = o.load("%s/X-%s" % (dirPath, filePath))
- b = p.load("%s/Y-%s" % (dirPath, filePath))
- print("Load resulted : %s" % a)
- print("Load resulted : %s" % b)
- print("\nLoaded %s" % (fileName))
- print("Loaded %i items to X index" % (self.Xindex.get_n_items()))
- print("Loaded %i items to Y index" % (self.Yindex.get_n_items()))
- print("\nIndices loaded OK")
- self.Xindex = o
- # self.Yindex = p
- return True
Add Comment
Please, Sign In to add comment