Guest User

Bloom.h

a guest
May 29th, 2015
280
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.91 KB | None | 0 0
  1. ######## Bloom.h #########
  2. #ifndef Bloom_h
  3. #define Bloom_h
  4. #include <stdlib.h>
  5. #include <inttypes.h>
  6. #include <stdint.h>
  7.  
  8. // not using kmer_type from Kmer.h because I don't want this class to depend on Kmer.h
  9. #ifdef _largeint
  10. #include "LargeInt.h"
  11. typedef LargeInt<KMER_PRECISION> bloom_elem;
  12. #else
  13. #ifdef _ttmath
  14. #include "ttmath/ttmath.h"
  15. typedef ttmath::UInt<KMER_PRECISION> bloom_elem;
  16. #else
  17. #if (! defined kmer_type) || (! defined _LP64)
  18. typedef uint64_t bloom_elem;
  19. #else
  20. typedef kmer_type bloom_elem;
  21. #endif
  22. #endif
  23. #endif
  24.  
  25.  
  26.  
  27. #define NSEEDSBLOOM 10
  28. #define CUSTOMSIZE 1
  29.  
  30.  
  31. static const int bits_per_char = 0x08;    // 8 bits in 1 char(unsigned)
  32. static const unsigned char bit_mask[bits_per_char] = {
  33.     0x01,  //00000001
  34.     0x02,  //00000010
  35.     0x04,  //00000100
  36.     0x08,  //00001000
  37.     0x10,  //00010000
  38.     0x20,  //00100000
  39.     0x40,  //01000000
  40.     0x80   //10000000
  41. };
  42.  
  43.  
  44. static const int cpt_per_char = 2;    
  45. static const unsigned char cpt_mask[cpt_per_char] = {
  46.     0x0F,  //00001111
  47.     0xF0,  //11110000
  48. };
  49.  
  50. static const uint64_t cpt_mask21[21] = {
  51.   0x0000000000000007ULL,//00000....00000111
  52.   0x0000000000000038ULL,
  53.   0x00000000000001C0ULL,
  54.   0x0000000000000E00ULL,
  55.   0x0000000000007000ULL,
  56.   0x0000000000038000ULL,
  57.   0x00000000001C0000ULL,
  58.   0x0000000000E00000ULL,
  59.   0x0000000007000000ULL,
  60.   0x0000000038000000ULL,
  61.   0x00000001C0000000ULL,
  62.   0x0000000E00000000ULL,
  63.   0x0000007000000000ULL,
  64.   0x0000038000000000ULL,
  65.   0x00001C0000000000ULL,
  66.   0x0000E00000000000ULL,
  67.   0x0007000000000000ULL,
  68.   0x0038000000000000ULL,
  69.   0x01C0000000000000ULL,
  70.   0x0E00000000000000ULL,
  71.   0x7000000000000000ULL
  72.  
  73. };
  74.  
  75.  
  76. static const uint64_t cpt_mask32[32] = {
  77.   0x0000000000000003ULL,//00000....00000011
  78.   0x000000000000000CULL,
  79.   0x0000000000000030ULL,//00000....000110000
  80.   0x00000000000000C0ULL,
  81.   0x0000000000000300ULL,
  82.   0x0000000000000C00ULL,
  83.   0x0000000000003000ULL,
  84.   0x000000000000C000ULL,
  85.   0x0000000000030000ULL,
  86.   0x00000000000C0000ULL,
  87.   0x0000000000300000ULL,
  88.   0x0000000000C00000ULL,
  89.   0x0000000003000000ULL,
  90.   0x000000000C000000ULL,
  91.   0x0000000030000000ULL,
  92.   0x00000000C0000000ULL,
  93.   0x0000000300000000ULL,
  94.   0x0000000C00000000ULL,
  95.   0x0000003000000000ULL,
  96.   0x000000C000000000ULL,
  97.   0x0000030000000000ULL,
  98.   0x00000C0000000000ULL,
  99.   0x0000300000000000ULL,
  100.   0x0000C00000000000ULL,
  101.   0x0003000000000000ULL,
  102.   0x000C000000000000ULL,
  103.   0x0030000000000000ULL,
  104.   0x00C0000000000000ULL,
  105.   0x0300000000000000ULL,
  106.   0x0C00000000000000ULL,
  107.   0x3000000000000000ULL,
  108.   0xC000000000000000ULL
  109. };
  110.  
  111.  
  112.  
  113. /* static const unsigned char incr_cpt_table[2][255] =
  114.  {
  115.  {1, 2,3},
  116.  {3, 4,3},
  117.  };
  118.  */
  119.  
  120. static const uint64_t rbase[NSEEDSBLOOM] =
  121. {
  122.     0xAAAAAAAA55555555ULL,
  123.     0x33333333CCCCCCCCULL,
  124.     0x6666666699999999ULL,
  125.     0xB5B5B5B54B4B4B4BULL,
  126.     0xAA55AA5555335533ULL,
  127.     0x33CC33CCCC66CC66ULL,
  128.     0x6699669999B599B5ULL,
  129.     0xB54BB54B4BAA4BAAULL,
  130.     0xAA33AA3355CC55CCULL,
  131.     0x33663366CC99CC99ULL
  132. };
  133.  
  134.  
  135. /*
  136.  
  137.  0x2E7E5A8996F99AA5,
  138.  0x74B2E1FB222EFD24,
  139.  0x8BBE030F6704DC29,
  140.  0x6D8FD7E91C11A014,
  141.  0xFC77642FF9C4CE8C,
  142.  0x318FA6E7C040D23D,
  143.  0xF874B1720CF914D5,
  144.  0xC569F575CDB2A091,
  145.  */
  146.  
  147. //static uint64_t pri1=0x5AF3107A401FULL;
  148. //static uint64_t pri2 =0x78C27CE77ULL;
  149.  
  150. class Bloom{
  151.    
  152. protected:
  153.    
  154. #ifdef _largeint
  155.     inline uint64_t hash_func(LargeInt<KMER_PRECISION> elem, int num_hash);
  156. #endif
  157. #ifdef _ttmath
  158.     inline uint64_t hash_func(ttmath::UInt<KMER_PRECISION> elem, int num_hash);
  159. #endif
  160. #ifdef _LP64
  161.     inline uint64_t hash_func(__uint128_t key, int num_hash);
  162. #endif
  163.     inline uint64_t hash_func(uint64_t key, int num_hash);
  164.    
  165.     inline void generate_hash_seed();
  166.    
  167.     uint64_t user_seed;
  168.     uint64_t seed_tab[NSEEDSBLOOM];
  169.     int n_hash_func;
  170.     uint64_t nchar;
  171. public:
  172.     unsigned char * blooma;
  173.  
  174.     void setSeed(uint64_t seed) ;
  175.     void set_number_of_hash_func(int i) ;
  176.    
  177.     void add(bloom_elem elem);
  178.     int  contains(bloom_elem elem);
  179.    
  180.     uint64_t tai;
  181.     uint64_t nb_elem;
  182.    
  183.     void dump(char * filename);
  184.     void load(char * filename);
  185.  
  186.     long weight();
  187.  
  188.     Bloom(int tai_bloom);
  189.     Bloom(uint64_t tai_bloom);
  190.  
  191.     Bloom();
  192.    
  193.    
  194.     ~Bloom();
  195.    
  196. };
  197.  
  198.  
  199.  
  200. class BloomCpt: public Bloom {
  201.     public :
  202.     BloomCpt(int tai_bloom);
  203.     BloomCpt();
  204.  
  205.     void add(bloom_elem elem);
  206.    
  207.     int  contains_n_occ(bloom_elem elem, int nks);
  208.  
  209.    
  210. };
  211.  
  212.  
  213. class BloomCpt3: public BloomCpt {
  214.     public :
  215.     BloomCpt3(int tai_bloom);
  216.         ~BloomCpt3();
  217.  
  218.     uint64_t * blooma3;
  219.     void add(bloom_elem elem);
  220.    
  221.     int  contains_n_occ(bloom_elem elem, int nks);
  222.    
  223. };
  224.  
  225.  
  226.  
  227. class BloomCpt2: public BloomCpt {
  228.     public :
  229.     BloomCpt2(int tai_bloom);
  230.     ~BloomCpt2();
  231.  
  232.     uint64_t * blooma2;
  233.     void add(bloom_elem elem);
  234.    
  235.     int  contains_n_occ(bloom_elem elem, int nks);
  236.    
  237. };
  238.  
  239.  
  240.  
  241.  
  242. #endif
Advertisement
Add Comment
Please, Sign In to add comment