Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import mmh3
- from bitarray import bitarray
- class BloomFilter():
- def __init__(self, size, hashcount):
- self.size = size
- self.hashcount = hashcount
- self.filter = bitarray(size)
- self.filter.setall(0)
- def add(self, entry):
- for seed in range(self.hashcount):
- result = mmh3.hash(str(entry), seed) % self.size
- self.filter[result] = 1
- def lookup(self, entry):
- for seed in range(self.hashcount):
- if self.filter[mmh3.hash(str(entry), seed) % self.size] == 0:
- return False
- return True
- def save(self, path):
- # Save bloom filter to path
- print(self.filter)
- pass
- def load(self, path):
- # Load saved bloom filter
- pass
- def accuracy(self, path):
- # Calculate false positive rate
- # fp = (1 - [1 - 1 / size] ^ hashcount * expected_items) ^ hashcount
- pass
- bloomfilter = BloomFilter(10000, 5)
- for x in range(10):
- bloomfilter.add(x)
- for x in range(15):
- print(x, bloomfilter.lookup(x))
- bloomfilter.save("asdf")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement