Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # n = Number of items in the filter
- # p = Probability of false positives, float between 0 and 1 or a number indicating 1-in-p
- # m = Number of bits in the filter
- # k = Number of hash functions
- require_relative '../bitarray/bit_array'
- require 'digest'
- class BloomFilter
- def initialize k: 1, m: 10
- @k = k
- @m = m
- @n = 0
- @b = BitArray.new
- end
- # Probability of a false positive based on formula in wikipedia
- def false_positive_rate
- fp_rate = (1-(1-1/@m.to_f)**(@k.to_f*@n.to_f))**@k.to_f
- p fp_rate
- fp_rate.round(2)
- end
- def insert item
- @n += 1
- @k.times do |n|
- hash_val = Digest::MD5.hexdigest( item.to_s + n.to_s ).to_i(16)
- position = hash_val % @m
- @b.set position
- end
- self
- end
- def include? item
- @k.times do |n|
- hash_val = Digest::MD5.hexdigest( item.to_s + n.to_s ).to_i(16)
- position = hash_val % @m
- return @b.get(position) == 1
- end
- return false
- end
- end
- require 'minitest/autorun'
- require_relative 'bloom_filter'
- class BloomFilterTest < MiniTest::Unit::TestCase
- # Verified probability formula and tests from http://hur.st/bloomfilter?n=1&p=0.01
- def test_stats
- assert_equal 0.01, BloomFilter.new(k: 7, m: 10).insert("a").false_positive_rate
- b = BloomFilter.new(k: 2, m: 29)
- 10.times { |n| b.insert(n) }
- assert_equal 0.25, b.false_positive_rate
- end
- def test_include
- b = BloomFilter.new(k: 2, m: 100)
- b.insert "The Trumpet of the Swan"
- assert_equal true, b.include?("The Trumpet of the Swan")
- refute b.include?("E. B. White")
- end
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement