Advertisement
Guest User

test.ml

a guest
Jul 30th, 2015
266
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
OCaml 4.00 KB | None | 0 0
  1. #require "extlib";;
  2.  
  3. module Foo(F : sig type a end) = struct
  4.   type a = F.a
  5.   (* --------------------------------------------------
  6.      Util functions *)
  7.  
  8.   (* 0b0100 => 0b0111 ; 0b101 => 0b111 *)
  9.   let setAllBitsRightOfLeadingOne (x : int) : int
  10.     = List.fold_right (fun a b -> b lor (b lsr a)) [1;2;4;8;16] x;;
  11.  
  12.   let createIndex (h : int) (m : int) : int
  13.     = let bitmask = setAllBitsRightOfLeadingOne m in
  14.     let index   = bitmask land h in
  15.     index;;
  16.  
  17.   let hashWithSalt (obj : a) (salt : int) : int = Hashtbl.hash (salt + Hashtbl.hash obj);;  (* Hash object and salt *)
  18.  
  19.   (* Hash value must map to index value. Hash is much bigger than range of the index (0 to m-1), and
  20.    * this creates a challenge. Taking modulo of hash is one method, but is erroneous b/c it doesn't
  21.    * produce a uniform distribution of index values.
  22.    * Solution is to only take hash values from 0 to m-1. Optimize by masking out bits larger than (m-1).
  23.    * Example, probability of valid hash: if m = 1000, then the odds of a 32 bit hash being less than 'm' is about 1000 / 2^32
  24.   *)
  25.   let rec getIndexes' (obj : a) (m : int) (salt : int) (k : int) : int list =
  26.     match k with
  27.       0 -> []
  28.     | _ -> let h     = hashWithSalt obj salt in
  29.       let index = createIndex h m in
  30.       match (index < m) with                                  (* index should be in bounds... i.e. 'm'=10, so valid indexes are 0 to 9 *)
  31.         true  -> index :: getIndexes' obj m (salt + 1) (k-1)  (* Can use index. Decrement k *)
  32.       | false ->          getIndexes' obj m (salt + 1) k      (* Can't use index, so try again *)
  33.   ;;
  34.  
  35.   let getIndexes (obj : a) (k : int) (m : int) : int list
  36.     = getIndexes' obj m 0 k  (* Initialize salt value to zero *)
  37.   ;;
  38.  
  39.   let round (x : float) : int = int_of_float (floor (x +. 0.5));;
  40.  
  41.   (* --------------------------------------------------
  42.      Either type *)
  43.  
  44.   type ('a, 'b) either = Left of 'a | Right of 'b;;
  45.  
  46.   let getLeft (eith : ('a,'b) either) : 'a list
  47.     = match eith with
  48.     | Left x  -> x :: []
  49.     | Right y -> []
  50.   ;;
  51.  
  52.   let getRight (eith : ('a,'b) either) : 'b list
  53.     = match eith with
  54.     | Left x  -> []
  55.     | Right y -> y :: []
  56.   ;;
  57.  
  58.   let fmapEither (fn : 'b -> 'c) (eith : ('a,'b) either) : ('a,'c) either
  59.     = match eith with
  60.     | Left x  -> Left x
  61.     | Right y -> Right (fn y)
  62.   ;;
  63.  
  64.  
  65.   (* --------------------------------------------------
  66.      Bloom filter *)
  67.  
  68.   module BloomFilter =
  69.   struct
  70.     type bloomFilterT = { n    : int;         (* n  Expected number of elements *)
  71.                              p    : float;       (* p  Probability of false positive *)
  72.                              k    : int;         (* k  Number of hashes *)
  73.                              m    : int;         (* m  Number of bins *)
  74.                              bf   : BitSet.t; }  (* Bit array (bloom filter) *)
  75.  
  76.     let create (n : int) (p : float) : (string,bloomFilterT) either
  77.       = let m' = -1.0 *. (float n) *. (log p) /. ((log 2.0) ** 2.0) in
  78.       let m  = round m' in
  79.       let k  = round (m' /. (float n) *. (log 2.0)) in
  80.       let bFT = { n = n;
  81.                   p = p;
  82.                   m = m;
  83.                   k = k;
  84.                   bf = (BitSet.create m) } in
  85.       if      m < 2              then Left "m value too small"
  86.       else if k < 1              then Left "k value less than one"
  87.       else if p < 0.0 || p > 1.0 then Left "p value out of range"
  88.       else Right bFT
  89.  
  90.     let insert (b : bloomFilterT) (obj : a) : unit
  91.       = let bf = b.bf in
  92.       let k  = b.k in
  93.       let m  = b.m in
  94.       let ks = getIndexes obj k m in
  95.       let setBf = BitSet.set bf in
  96.       List.map setBf ks;     (* Map over hash *)
  97.       ()
  98.  
  99.     let test (b : bloomFilterT) (obj : a) : bool
  100.       = let bf = b.bf in
  101.       let k  = b.k in
  102.       let m  = b.m in
  103.       let ks = getIndexes obj k m in
  104.       let isSetBf = BitSet.is_set bf in              
  105.       let result = List.fold_left (&&) true (List.map isSetBf ks) in
  106.       result
  107.   end
  108. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement