Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Presentation:
- # https://docs.google.com/presentation/d/1qAbWhkpvbGHIRzsmlv9b8DynodREiO1JB3zLS3yn5og/pub?start=false&loop=true&delayms=3000&slide=id.p
- __author__ = 'bigyan'
- import functools
- import math
- import operator
- import random
- import xxhash
- class BloomFilter:
- def __init__(self, numBits=10007, numHash=7, seed=221):
- def get_hash_seeds():
- rand_state = random.getstate()
- random.seed(seed)
- hs = random.sample(range(numBits * numHash), numHash)
- random.setstate(rand_state)
- return hs
- self.__numbits = numBits
- self.__state = 2 ** self.__numbits
- self.__hashSeeds = get_hash_seeds()
- self.__numElements = 0
- def get_state(self):
- return self.__numElements, bin(self.__state)[3:]
- def __str__(self):
- return bin(self.__state)[3:]
- def __hashSignature(self, item):
- hashValues = map(lambda s: 1 << (xxhash.xxh32(str(item), seed=s).intdigest() % self.__numbits),
- self.__hashSeeds)
- return functools.reduce(operator.or_, hashValues)
- def add(self, item):
- self.__state = self.__state | self.__hashSignature(item)
- self.__numElements += 1
- def __contains__(self, item):
- return self.__state == self.__state | self.__hashSignature(item)
- def containProbability(self, item):
- if self.__state == self.__state | self.__hashSignature(item):
- k, n, m = len(self.__hashSeeds), self.__numbits, self.__numElements
- return 1 - (1 - math.e ** (-m * k / n)) ** k
- else:
- return 0.0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement