cielavenir

kktkbwt benchmark

Jun 12th, 2020 (edited)
1,483
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.29 KB | None | 0 0
  1. // https://qiita.com/kktk-KO/items/82a567e562ccf0ed5d09
  2.  
  3. #include <iostream>
  4. #include <string>
  5. #include <vector>
  6. #include <algorithm>
  7. #include <chrono>
  8. #include <random>
  9. #include <cstring>
  10.  
  11. namespace kktkbwt {
  12.  
  13. template <class Iterator>
  14. struct default_lexically_lesser
  15. {
  16.   bool operator() (Iterator left, Iterator right) noexcept
  17.   {
  18. #if 0
  19.     if (*left == '\0' && *right == '\0') { return false; }
  20.     if (*left == '\0' && *right != '\0') { return true; }
  21.     if (*left != '\0' && *right == '\0') { return false; }
  22.     return *left < *right || (*left == *right && (*this)(++left, ++right));
  23. #else
  24.     for(;*left == *right && *left != '\0';++left,++right);
  25.     return *left < *right;
  26. #endif
  27.   }
  28. };
  29.  
  30. template <
  31.   class RandomAccessIterator1,
  32.   class RandomAccessIterator2,
  33.   class OutputIterator,
  34.   class DifferenceType,
  35.   class LexicallyLesser = default_lexically_lesser<RandomAccessIterator1>
  36. >
  37. void bwt (RandomAccessIterator1 in, RandomAccessIterator2 aux, OutputIterator out, DifferenceType size)
  38. {
  39.   for (DifferenceType i = 0; i < size; ++i) { aux[i] = i; }
  40.  
  41.   std::sort(aux, aux + size, [&] (DifferenceType l, DifferenceType r) {
  42.     return LexicallyLesser()(in + l, in + r); }
  43.     //return strcmp(in+l,in+r)<0; }
  44.   );
  45.  
  46.   for (DifferenceType i = 0; i < size; ++i) { out[i] = in[(aux[i] + size - 1) % size]; }
  47. }
  48.  
  49. }
  50.  
  51. namespace ciel {
  52. struct sorter{
  53.     //string str;
  54.     const char *p;
  55.     sorter(std::string &_str):p(_str.c_str()){}
  56.     //bool operator()(const int a, const int b) const{return str.substr(a)<str.substr(b);}
  57.     bool operator()(const int a, const int b) const{return strcmp(p+a,p+b)<0;}
  58. };
  59. std::vector<int> buildSA(std::string &t){
  60.     int n=t.size();
  61.     std::vector<int>suff(n);
  62.     for(int i=0;i<n;i++)suff[i]=i;
  63.     std::sort(suff.begin(),suff.end(),sorter(t));
  64.     return suff;
  65. }
  66. }
  67.  
  68. namespace prefield {
  69. struct SAComp{
  70.     const int h,*g;
  71.     SAComp(const int h, const int* g) : h(h), g(g) {}
  72.     bool operator()(const int a, const int b){
  73.         return a == b ? false : g[a] != g[b] ? g[a] < g[b] : g[a+h] < g[b+h];
  74.     }
  75. };
  76. std::vector<int> buildSA(std::string &t){
  77.     int n=t.size();
  78.     int g[n],b[n];
  79.     std::vector<int>suff(n);
  80.     for(int i=0;i<n;i++)suff[i]=i,g[i]=t[i];
  81.     b[0]=b[n-1]=0;
  82.     std::sort(suff.begin(),suff.end(),SAComp(0,g));
  83.     for(int h=1;b[n-1]!=n-1;h<<=1){
  84.         SAComp comp(h,g);
  85.         std::sort(suff.begin(),suff.end(),comp);
  86.         for(int i=0;i<n-1;i++)b[i+1]=b[i]+comp(suff[i],suff[i+1]);
  87.         for(int i=0;i<n;i++)g[suff[i]]=b[i];
  88.     }
  89.     return suff;
  90. }
  91. }
  92.  
  93. std::string buildBWT(std::string &t,std::vector<int> &suff){
  94.     int n=t.size();
  95.     std::string bw;
  96.     for(int i=0;i<n;i++)bw+=t[(suff[i]?:n)-1];
  97.     return bw;
  98. }
  99.  
  100. int main(){
  101.     //manual
  102.     //std::string str="banana$";
  103.     //std::string str="TACGTAATCATGCGGCTAGCGCATGCATGCGTAGCGCATCGTAGCTC$";
  104.  
  105.     //repeat
  106.     std::string str(50000,'a'); str+='$';
  107.  
  108.     /*
  109.     //random
  110.     std::string str;
  111.     {
  112.         std::mt19937 engine(123456789);
  113.         std::uniform_int_distribution<> dist(0,26);
  114.         for(int i=0;i<100000;i++){
  115.             str+='a'+dist(engine);
  116.         }
  117.         str+='$';
  118.     }
  119.     */
  120.  
  121.     std::cin.tie(0);
  122.     std::ios::sync_with_stdio(false);
  123.  
  124.     for(int i=0;i<2;i++){
  125.     {
  126.         auto start = std::chrono::system_clock::now();
  127.         std::vector<int> suff=prefield::buildSA(str);
  128.         std::string bwt=buildBWT(str,suff);
  129.         auto end = std::chrono::system_clock::now();
  130.         //std::cout<<bwt<<std::endl;
  131.         std::cout<<std::chrono::duration_cast<std::chrono::microseconds>(end-start).count()<<std::endl;
  132.     }
  133.  
  134.     {
  135.         auto start = std::chrono::system_clock::now();
  136.         std::vector<int> suff=ciel::buildSA(str);
  137.         std::string bwt=buildBWT(str,suff);
  138.         auto end = std::chrono::system_clock::now();
  139.         //std::cout<<bwt<<std::endl;
  140.         std::cout<<std::chrono::duration_cast<std::chrono::microseconds>(end-start).count()<<std::endl;
  141.     }
  142.  
  143.     {
  144.         auto start = std::chrono::system_clock::now();
  145.         std::vector<int> suff(str.size());
  146.         std::string bwt;
  147.         bwt.resize(str.size());
  148.         kktkbwt::bwt(&str[0],&suff[0],&bwt[0],str.size());
  149.         auto end = std::chrono::system_clock::now();
  150.         //std::cout<<bwt<<std::endl;
  151.         std::cout<<std::chrono::duration_cast<std::chrono::microseconds>(end-start).count()<<std::endl;
  152.     }
  153.     }
  154. }
Advertisement
Add Comment
Please, Sign In to add comment