Guest User

Untitled

a guest
Jun 19th, 2018
118
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.81 KB | None | 0 0
  1. require 'zlib'
  2.  
  3. # this is optimized to store a relatively smaller subset of common index
  4. # terms against which new strings should be tested
  5. class ZlibDistanceCalc
  6. def initialize
  7. @c_hash = Hash.new
  8. end
  9.  
  10. def index(key, term)
  11. @c_hash[key] = [term, compress(term)]
  12. end
  13.  
  14. def test(term)
  15. coeff = term.size.to_f
  16.  
  17. results = {}
  18.  
  19. @c_hash.each do |k,v|
  20. orig_str, orig_cmp = v
  21.  
  22. delta = (compress(orig_str + term).size - orig_cmp.size) / coeff
  23. results[k] = delta
  24. end
  25.  
  26. return results
  27. end
  28.  
  29. def search(term, delta=0.5)
  30. test_results = Hash[*test(term).select {|k,v| v <= delta }.flatten]
  31.  
  32. if block_given?
  33. test_results.each {|k,v| yield k, v }
  34. else
  35. return test_results
  36. end
  37. end
  38.  
  39. def compress(term)
  40. Zlib::Deflate.deflate(term.strip.downcase)
  41. end
  42. end
Add Comment
Please, Sign In to add comment