Advertisement
_takumi

main

Apr 12th, 2023 (edited)
899
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 6.99 KB | None | 0 0
  1. #include <algorithm>
  2. #include <iostream>
  3. #include <fstream>
  4. #include <sstream>
  5. #include <string>
  6. #include <vector>
  7. #include <chrono>
  8.  
  9. const int SMALL_FILE_SIZE = 10000;
  10. const int LARGE_FILE_SIZE = 100000;
  11.  
  12. enum ALPH_TYPE {
  13.     BIN=2,
  14.     DNA=4
  15. };
  16.  
  17. std::vector<int> br(const std::string &s) {
  18.     std::vector<int> p(s.size(), 0);
  19.     for (size_t i = 1; i < s.size(); ++i) {
  20.         int k = p[i - 1];
  21.         while (k > 0 && (s[i] != s[k] || s[i] != '?' || s[k] != '?')) {
  22.             k = p[k - 1];
  23.         }
  24.         if (s[i] == s[k] || s[i] == '?' || s[k] == '?') {
  25.             ++k;
  26.         }
  27.         p[i] = k;
  28.     }
  29.     return p;
  30. }
  31.  
  32. int64_t kmp(const std::string &src, const std::string &pattern) {
  33.     auto start = std::chrono::high_resolution_clock::now();
  34.     std::vector<int> p = br(pattern);
  35.     size_t n = src.size();
  36.     size_t m = p.size();
  37.     size_t j = 0;
  38.     for (int i = 0; i < n; ++i) {
  39.         if (src[i] == pattern[j] || src[i] == '?' || pattern[j] == '?') {
  40.             ++j;
  41.             if (j == m) {
  42. //                return i - m + 2;
  43.                 auto elapsed = std::chrono::high_resolution_clock::now() - start;
  44.                 return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed).count();
  45.             }
  46.         } else {
  47.             if (j != 0) {
  48.                 j = p[j - 1];
  49.             }
  50.         }
  51.     }
  52. //    return 0;
  53.     auto elapsed = std::chrono::high_resolution_clock::now() - start;
  54.     return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed).count();
  55. }
  56.  
  57. std::vector<int> brs(std::string &s) {
  58.     s += '`';
  59.     size_t n = s.size();
  60.     std::vector<int> p_brs(n, 0);
  61.     std::vector<int> p_br = br(s);
  62.     for (size_t i = 1; i < n; ++i) {
  63.         if (s[p_br[i - 1] + 1] != s[i + 1]) {
  64.             p_brs[i] = p_br[i];
  65.         } else {
  66.             p_brs[i] = p_brs[p_br[i]];
  67.         }
  68.     }
  69.     return p_brs;
  70. }
  71.  
  72. int64_t kmp2(const std::string &src, std::string pattern) {
  73.     auto start = std::chrono::high_resolution_clock::now();
  74.     size_t n = src.size();
  75.     size_t m = pattern.size();
  76.     auto p = brs(pattern);
  77.     int j = 0;
  78.     for (size_t i = 0; i < n; ++i) {
  79.         while (j > 0 && src[i] != pattern[j] && pattern[j] != '?') {
  80.             j = p[j - 1];
  81.         }
  82.         if (src[i] == pattern[j] || pattern[j] == '?') {
  83.             ++j;
  84.             if (j == m) {
  85. //                return i - m + 2;
  86.                 auto elapsed = std::chrono::high_resolution_clock::now() - start;
  87.                 return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed).count();
  88.             }
  89.         }
  90.     }
  91. //    return 0;
  92.     auto elapsed = std::chrono::high_resolution_clock::now() - start;
  93.     return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed).count();
  94. }
  95.  
  96. bool str_equals(const std::string &s, const std::string &pat, size_t from) {
  97.     size_t size = pat.size();
  98.     for (size_t i = from; i < from + size; ++i) {
  99.         if (!(s[i] == pat[i - from] || s[i] == '?' || pat[i - from] == '?')) {
  100.             return false;
  101.         }
  102.     }
  103.     return true;
  104. }
  105.  
  106. int64_t naive_search(const std::string &src, const std::string &pattern) {
  107.     auto start = std::chrono::high_resolution_clock::now();
  108.     size_t size1 = src.size();
  109.     size_t size2 = pattern.size();
  110.     for (size_t i = 0; i < size1 - size2 + 1; ++i) {
  111.         if (str_equals(src, pattern, i)) {
  112. //            return i + 1;
  113.             auto elapsed = std::chrono::high_resolution_clock::now() - start;
  114.             return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed).count();
  115.         }
  116.     }
  117. //    return 0;
  118.     auto elapsed = std::chrono::high_resolution_clock::now() - start;
  119.     return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed).count();
  120. }
  121.  
  122. void write_data(std::ofstream &out, const std::string &algname, int subs_syms,
  123.                 int srcfilesize, ALPH_TYPE type, size_t ptrnsize, int64_t secs) {
  124.     std::ostringstream ss;
  125.     ss << algname << ',' << subs_syms << ',' << srcfilesize << ','
  126.        << type << ',' << ptrnsize << ',' << secs << std::endl;
  127.     out << ss.str();
  128. }
  129.  
  130. int main(int argc, char *argv[]) {
  131.     srand(time(nullptr));
  132.     const std::string naive_name = "Naive search";
  133.     const std::string kmp_name = "KMP standard";
  134.     const std::string kmp2_name = "KMP exquisite";
  135.     std::ifstream smallinput("smallfile.txt");
  136.     std::ifstream largeinput("largefile.txt");
  137.     std::ifstream patterns("patterns.txt");
  138.     std::ofstream data("data.csv");
  139.     std::string smlfile;
  140.     std::string lrgfile;
  141.     std::getline(smallinput, smlfile);
  142.     std::getline(largeinput, lrgfile);
  143.     std::string smlpat, lrgpat;
  144.     while (std::getline(patterns, smlpat, ',') && std::getline(patterns, lrgpat)) {
  145.         // string from small file
  146.         int64_t secs = naive_search(smlfile, smlpat);
  147.         write_data(data, naive_name, 0, SMALL_FILE_SIZE, BIN, smlpat.size(), secs);
  148.         secs = kmp(smlfile, smlpat);
  149.         write_data(data, kmp_name, 0, SMALL_FILE_SIZE, BIN, smlpat.size(), secs);
  150.         secs = kmp2(smlfile, smlpat);
  151.         write_data(data, kmp2_name, 0, SMALL_FILE_SIZE, BIN, smlpat.size(), secs);
  152.  
  153.         // string from large file:
  154.         secs = naive_search(lrgfile, lrgpat);
  155.         write_data(data, naive_name, 0, LARGE_FILE_SIZE, DNA, lrgpat.size(), secs);
  156.         secs = kmp(lrgfile, lrgpat);
  157.         write_data(data, kmp_name, 0, LARGE_FILE_SIZE, DNA, lrgpat.size(), secs);
  158.         secs = kmp2(lrgfile, lrgpat);
  159.         write_data(data, kmp2_name, 0, LARGE_FILE_SIZE, DNA, lrgpat.size(), secs);
  160.  
  161.         std::string qmsmlpat(smlpat), qmlrgpat(lrgpat);
  162.         size_t lb, ub = 0;
  163.         for (int i = 1; i <= 4; ++i) {
  164.             lb = ub + 1;
  165.             ub = (qmsmlpat.size() / 4) * i - 1;
  166.             size_t range = ub - lb + 1;
  167.             size_t pos = std::rand() % range + lb;
  168.             qmsmlpat.replace(pos, 1, "?");
  169.             pos = std::rand() % range + lb;
  170.             qmlrgpat.replace(pos, 1, "?");
  171.  
  172.             // string from small file
  173.             secs = naive_search(qmsmlpat, smlpat);
  174.             write_data(data, naive_name, i, SMALL_FILE_SIZE, BIN, qmsmlpat.size(), secs);
  175.             secs = kmp(qmsmlpat, smlpat);
  176.             write_data(data, kmp_name, i, SMALL_FILE_SIZE, BIN, qmsmlpat.size(), secs);
  177.             secs = kmp2(qmsmlpat, smlpat);
  178.             write_data(data, kmp2_name, i, SMALL_FILE_SIZE, BIN, qmsmlpat.size(), secs);
  179.  
  180.             // string from large file:
  181.             secs = naive_search(qmlrgpat, lrgpat);
  182.             write_data(data, naive_name, i, LARGE_FILE_SIZE, DNA, qmlrgpat.size(), secs);
  183.             secs = kmp(lrgfile, lrgpat);
  184.             write_data(data, kmp_name, i, LARGE_FILE_SIZE, DNA, qmlrgpat.size(), secs);
  185.             secs = kmp2(lrgfile, lrgpat);
  186.             write_data(data, kmp2_name, i, LARGE_FILE_SIZE, DNA, qmlrgpat.size(), secs);
  187.         }
  188.  
  189.         // check if newline is on top
  190.         if (patterns.peek() == '\n') {
  191.             patterns.ignore();
  192.         }
  193.     }
  194.     return 0;
  195. }
  196.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement