Advertisement
Guest User

Untitled

a guest
Jun 22nd, 2018
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.14 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import mmh3
  4. from bitarray import bitarray
  5.  
  6. class BloomFilter():
  7.     def __init__(self, size, hashcount):
  8.         self.size = size
  9.         self.hashcount = hashcount
  10.         self.filter = bitarray(size)
  11.         self.filter.setall(0)
  12.  
  13.     def add(self, entry):
  14.         for seed in range(self.hashcount):
  15.             result = mmh3.hash(str(entry), seed) % self.size
  16.             self.filter[result] = 1
  17.  
  18.     def lookup(self, entry):
  19.         for seed in range(self.hashcount):
  20.             if self.filter[mmh3.hash(str(entry), seed) % self.size] == 0:
  21.                 return False
  22.         return True
  23.  
  24.     def save(self, path):
  25.         # Save bloom filter to path
  26.         print(self.filter)
  27.         pass
  28.  
  29.     def load(self, path):
  30.         # Load saved bloom filter
  31.         pass
  32.  
  33.     def accuracy(self, path):
  34.         # Calculate false positive rate
  35.         # fp = (1 - [1 - 1 / size] ^ hashcount * expected_items) ^ hashcount
  36.         pass
  37.  
  38. bloomfilter = BloomFilter(10000, 5)
  39.  
  40. for x in range(10):
  41.     bloomfilter.add(x)
  42.  
  43. for x in range(15):
  44.     print(x, bloomfilter.lookup(x))
  45.  
  46. bloomfilter.save("asdf")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement