Advertisement
Guest User

Untitled

a guest
Nov 12th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 7.98 KB | None | 0 0
  1. /*
  2.  Aho-Corasick by Vladimir Tochilin
  3.  
  4.  Description(Eng):
  5.  Search template is given as string with length m,
  6.  which can consist of ordinary symbols and "?"
  7.  Find all template entrance positions in text with length m.
  8.  Entrance means, that all ordinary symbols are similar to
  9.  corresponding from text and "?" is replaced with arbitary one.
  10.  m ≤ 5000, n ≤ 2000000.
  11.  
  12.  Time: O((n+m)log#Sigma + Z),
  13.  Memory: O(m)
  14.  */
  15. #include <deque>
  16. #include <queue>
  17. #include <iostream>
  18. #include <map>
  19. #include <memory>
  20. #include <string>
  21. #include <vector>
  22.  
  23. const char DIVIDER = '?';
  24.  
  25. class Bohr {
  26. public:
  27.     struct BohrNode {
  28.         BohrNode();
  29.         std::map<const char, std::shared_ptr<BohrNode>> transitions;
  30.         std::weak_ptr<BohrNode> term_link;
  31.         std::weak_ptr<BohrNode> suff_link;
  32.         bool is_terminal;  // If terminal, than store index in dictionary
  33.         bool is_root;
  34.         std::vector<size_t> wordlist_idxs;
  35.  
  36.         bool operator ==(const BohrNode& rhv);
  37.         std::shared_ptr<BohrNode> FindTransition(char ch);
  38.         ~BohrNode() = default;
  39.     };
  40.     std::vector<std::pair<std::string, size_t>> wordlist;  // String and distance to start
  41.     size_t pattern_size;
  42.     std::shared_ptr<BohrNode> root;
  43.  
  44.     Bohr();
  45.     explicit Bohr(size_t pattern_size);
  46.     void AddPattern(const std::string& pattern, size_t divider_count);
  47.     void Init();
  48.     void Step(char ch, std::shared_ptr<BohrNode>& current_node);
  49.     std::vector<size_t> PatternSearch(std::string& text, size_t extra_symbols);
  50.     ~Bohr() = default;
  51. };
  52.  
  53. int main() {
  54.     std::string pattern;
  55.     std::cin >> pattern;
  56.     Bohr bohr(pattern.length());
  57.  
  58.     size_t last_char = 0;
  59.     size_t divider_count = 0;
  60.     for (size_t i = 0; i < pattern.length(); ++i) {
  61.         if (pattern[i] == DIVIDER) {
  62.             if (last_char != i) {
  63.                 bohr.AddPattern(pattern.substr(last_char, i - last_char),
  64.                                 divider_count);
  65.                 divider_count = 0;
  66.             }
  67.             ++divider_count;
  68.             last_char = i + 1;
  69.         } else if (i == pattern.length() - 1) {
  70.             bohr.AddPattern(pattern.substr(last_char, i - last_char + 1),
  71.                             divider_count);
  72.             divider_count = 0;
  73.         }
  74.     }
  75.     bohr.Init();
  76.  
  77.     std::string text;
  78.     std::cin >> text;
  79.     auto result = bohr.PatternSearch(text, divider_count); // Second parameter - extra "?" in the end
  80.     for (auto& idx : result) {
  81.         std::cout << idx << " ";
  82.     }
  83.  
  84.     return 0;
  85. }
  86.  
  87. Bohr::BohrNode::BohrNode(): suff_link(), term_link(), transitions(),
  88.                             is_terminal(false), is_root(false),
  89.                             wordlist_idxs() {}
  90.  
  91. Bohr::Bohr(): root(std::make_shared<BohrNode>()), wordlist(), pattern_size(0) {
  92.     root->suff_link = std::weak_ptr<BohrNode>(root);  // Root specification
  93.     root->term_link = std::weak_ptr<BohrNode>(root);
  94.     root->is_root = true;
  95. }
  96.  
  97. Bohr::Bohr(size_t pattern_size): root(std::make_shared<BohrNode>()), wordlist(),
  98.                                  pattern_size(pattern_size) {
  99.     root->suff_link = std::weak_ptr<BohrNode>(root);  // Root specification
  100.     root->term_link = std::weak_ptr<BohrNode>(root);
  101.     root->is_root = true;
  102. }
  103.  
  104. bool Bohr::BohrNode::operator ==(const Bohr::BohrNode& rhv) {  // Just in case
  105.     return transitions == rhv.transitions &&
  106.            suff_link.lock() == rhv.suff_link.lock() &&
  107.            term_link.lock() == rhv.term_link.lock() &&
  108.            is_terminal == rhv.is_terminal &&
  109.            is_root == rhv.is_root;
  110. }
  111.  
  112. std::shared_ptr<Bohr::BohrNode> Bohr::BohrNode::FindTransition(char ch) {
  113.     auto c_it = transitions.find(ch);
  114.     if (c_it == transitions.cend()) {
  115.         return nullptr;
  116.     } else {
  117.         return c_it->second;
  118.     }
  119. }
  120.  
  121. void Bohr::AddPattern(const std::string& pattern, size_t divider_count) {
  122.     auto current_node = root;
  123.     for (const char& ch : pattern) {
  124.         auto neighbour_node = current_node->FindTransition(ch);
  125.         if (neighbour_node == nullptr) {
  126.             neighbour_node = std::make_shared<BohrNode>();
  127.             if (current_node->is_root) {  // First symbols refer to root
  128.                 neighbour_node->suff_link = std::weak_ptr<BohrNode>(root);
  129.                 neighbour_node->term_link = neighbour_node->suff_link;
  130.             }
  131.             current_node->transitions[ch] = neighbour_node;
  132.         }
  133.         current_node = neighbour_node;
  134.     }
  135.  
  136.     if (!current_node->is_root) {
  137.         current_node->is_terminal = true;
  138.         current_node->wordlist_idxs.push_back(wordlist.size());
  139.         wordlist.emplace_back(pattern,
  140.                               (!wordlist.empty() ?
  141.                                wordlist.back().second + pattern.length() :
  142.                                pattern.length()) + divider_count);
  143.     }
  144. }
  145.  
  146. void Bohr::Init() {  // BFS
  147.     std::queue<std::shared_ptr<BohrNode>> bfs_queue;
  148.     bfs_queue.push(root);
  149.     while (!bfs_queue.empty()) {
  150.         auto current_node = bfs_queue.front();
  151.         bfs_queue.pop();
  152.         for (auto& map_el : current_node->transitions) {  // Make suffix and terminal references
  153.             const char ch = map_el.first;
  154.             auto neighbour_node = map_el.second;
  155.  
  156.             auto temp_node = current_node->suff_link;
  157.             while (true) {  // Suffix ones
  158.                 auto transition = temp_node.lock()->FindTransition(ch);
  159.                 if (transition != nullptr && transition != neighbour_node) {
  160.                     neighbour_node->suff_link = transition;
  161.                     break;
  162.                 } else {
  163.                     if (temp_node.lock()->is_root) {
  164.                         break;
  165.                     }
  166.                     temp_node = temp_node.lock()->suff_link;
  167.                 }
  168.             }
  169.             if (neighbour_node->suff_link.lock() == nullptr) {
  170.                 neighbour_node->suff_link = std::weak_ptr<BohrNode>(root);
  171.                 neighbour_node->term_link = neighbour_node->suff_link;
  172.             }
  173.  
  174.             if (neighbour_node->suff_link.lock()->is_terminal) {  // Terminal ones
  175.                 neighbour_node->term_link = neighbour_node->suff_link;
  176.             } else {
  177.                 neighbour_node->term_link =
  178.                         neighbour_node->suff_link.lock()->term_link;
  179.             }
  180.  
  181.             bfs_queue.push(neighbour_node);
  182.         }
  183.     }
  184. }
  185.  
  186. void Bohr::Step(const char ch, std::shared_ptr<BohrNode>& current_node) {
  187.     while (current_node != nullptr) {
  188.         auto candidate = current_node->FindTransition(ch);
  189.         if (candidate != nullptr) {
  190.             current_node = candidate;
  191.             return;
  192.         }
  193.         if (candidate == nullptr && current_node == root) {
  194.             return;
  195.         }
  196.         current_node = current_node->suff_link.lock();
  197.     }
  198.  
  199.     if (!(current_node->is_root)) {  // In case nothing found
  200.         current_node = root;
  201.     }
  202. }
  203.  
  204. std::vector<size_t> Bohr::PatternSearch(std::string& text, size_t extra_symbols) {
  205.     std::vector<size_t> pattern_indexes;
  206.     std::deque<size_t> search_deque(pattern_size);
  207.     size_t patterns_number = wordlist.size();
  208.  
  209.     auto current_node = root;
  210.     for (size_t i = 0; i < text.length(); ++i) {
  211.         Step(text[i], current_node);
  212.         auto additional_node = current_node;
  213.         while (!additional_node->is_root) {
  214.             if (additional_node->is_terminal) {
  215.                 for (auto& idx : additional_node->wordlist_idxs) {
  216.                     // Size - distance to start
  217.                     ++search_deque[search_deque.size() - wordlist[idx].second];
  218.                 }
  219.             }
  220.             additional_node = additional_node->term_link.lock();
  221.         }
  222.  
  223.         if (search_deque.front() == patterns_number && i >= pattern_size - 1) {  // All words from piece met
  224.             pattern_indexes.push_back(i - pattern_size + 1);
  225.         }
  226.         search_deque.pop_front();
  227.         search_deque.push_back(0);
  228.  
  229.     }
  230.     return pattern_indexes;
  231. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement