Advertisement
bigyan

BloomFilter

Nov 20th, 2015
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.62 KB | None | 0 0
  1. # Presentation:
  2. # https://docs.google.com/presentation/d/1qAbWhkpvbGHIRzsmlv9b8DynodREiO1JB3zLS3yn5og/pub?start=false&loop=true&delayms=3000&slide=id.p
  3.  
  4. __author__ = 'bigyan'
  5.  
  6. import functools
  7. import math
  8. import operator
  9. import random
  10. import xxhash
  11.  
  12. class BloomFilter:
  13.     def __init__(self, numBits=10007, numHash=7, seed=221):
  14.         def get_hash_seeds():
  15.             rand_state = random.getstate()
  16.             random.seed(seed)
  17.             hs = random.sample(range(numBits * numHash), numHash)
  18.             random.setstate(rand_state)
  19.             return hs
  20.  
  21.         self.__numbits = numBits
  22.         self.__state = 2 ** self.__numbits
  23.         self.__hashSeeds = get_hash_seeds()
  24.         self.__numElements = 0
  25.  
  26.     def get_state(self):
  27.         return self.__numElements, bin(self.__state)[3:]
  28.  
  29.     def __str__(self):
  30.         return bin(self.__state)[3:]
  31.  
  32.     def __hashSignature(self, item):
  33.         hashValues = map(lambda s: 1 << (xxhash.xxh32(str(item), seed=s).intdigest() % self.__numbits),
  34.                          self.__hashSeeds)
  35.         return functools.reduce(operator.or_, hashValues)
  36.  
  37.     def add(self, item):
  38.         self.__state = self.__state | self.__hashSignature(item)
  39.         self.__numElements += 1
  40.  
  41.     def __contains__(self, item):
  42.         return self.__state == self.__state | self.__hashSignature(item)
  43.  
  44.     def containProbability(self, item):
  45.         if self.__state == self.__state | self.__hashSignature(item):
  46.             k, n, m = len(self.__hashSeeds), self.__numbits, self.__numElements
  47.             return 1 - (1 - math.e ** (-m * k / n)) ** k
  48.         else:
  49.             return 0.0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement