Advertisement
kalabukdima

karasik.h

Apr 17th, 2018
212
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.82 KB | None | 0 0
  1. #pragma once
  2.  
  3. #include <array>
  4. #include <vector>
  5. #include <memory>
  6. #include <string>
  7. #include <queue>
  8.  
  9. class Aho {
  10. private:
  11.     static constexpr int kMaxSymbols = 'z' - 'a' + 1;
  12.  
  13. public:
  14.     struct Node {
  15.         Node(Node* parent, char lastSymbol)
  16.             : parent(parent)
  17.             , lastSymbol(lastSymbol) {}
  18.  
  19.         std::array<std::unique_ptr<Node>, kMaxSymbols> transitions;
  20.         Node* const parent; // nullptr for root.
  21.         const char lastSymbol;
  22.         Node* suffixLink = nullptr;
  23.         bool isTerminal = false;
  24.         int reachableTerminals = 0;
  25.     };
  26.  
  27. public:
  28.     Aho(const std::vector<std::string>& patterns);
  29.  
  30.     Aho() = delete;
  31.     Aho(const Aho&) = delete;
  32.     Aho(Aho&&) = default;
  33.     Aho& operator=(const Aho&) = delete;
  34.     Aho& operator=(Aho&&) = default;
  35.  
  36.     int numEntries(const std::string&) const noexcept;
  37.  
  38. private:
  39.     void addPattern(const std::string&);
  40.     void calcSubtreeSuffixLinks(Node&) noexcept;
  41.     Node* calcSuffixLink(Node&) noexcept;
  42.     void calcReachableTerminals();
  43.     Node* advance(Node*, char) const noexcept;
  44.  
  45.     static int ord(char ch) {
  46.         using namespace std::string_literals;
  47.         if (!('a' <= ch && ch <= 'z')) {
  48.             throw std::out_of_range("Symbol '"s + ch + "' is not a legal symbol");
  49.         }
  50.         return ch - 'a';
  51.     }
  52.  
  53. private:
  54.     const std::unique_ptr<Node> root;
  55. };
  56.  
  57. Aho::Aho(const std::vector<std::string>& patterns)
  58.     : root(std::make_unique<Node>(nullptr, '\0'))
  59. {
  60.     for (const auto& pattern : patterns) {
  61.         if (pattern == "") {
  62.             throw std::logic_error("Empty strings are not allowed");
  63.         }
  64.         addPattern(pattern);
  65.     }
  66.     root->suffixLink = root.get();
  67.     calcSubtreeSuffixLinks(*root);
  68.     calcReachableTerminals();
  69. }
  70.  
  71. void Aho::addPattern(const std::string& pattern) {
  72.     Node* ptr = root.get();
  73.     for (const auto& ch : pattern) {
  74.         const auto index = ord(ch);
  75.         if (!ptr->transitions[index]) {
  76.             ptr->transitions[index] = std::make_unique<Node>(ptr, ch);
  77.         }
  78.         ptr = ptr->transitions[index].get();
  79.     }
  80.     ptr->isTerminal = true;
  81. }
  82.  
  83. void Aho::calcSubtreeSuffixLinks(Node& node) noexcept {
  84.     calcSuffixLink(node);
  85.     for (const auto& child : node.transitions) {
  86.         if (child) {
  87.             calcSubtreeSuffixLinks(*child);
  88.         }
  89.     }
  90. }
  91.  
  92. auto Aho::calcSuffixLink(Node& node) noexcept -> Node* {
  93.     if (node.suffixLink) {
  94.         return node.suffixLink;
  95.     }
  96.     Node* ptr = node.parent;
  97.     const auto index = ord(node.lastSymbol);
  98.     while (ptr != root.get()) {
  99.         ptr = calcSuffixLink(*ptr);
  100.         if (ptr->transitions[index]) {
  101.             return node.suffixLink = ptr->transitions[index].get();
  102.         }
  103.     }
  104.     return node.suffixLink = root.get();
  105. }
  106.  
  107. void Aho::calcReachableTerminals() {
  108.     std::queue<Node*> queue;
  109.     queue.push(root.get());
  110.     root->reachableTerminals = 0;
  111.     while (!queue.empty()) {
  112.         const auto p = queue.front();
  113.         queue.pop();
  114.         p->reachableTerminals = p->suffixLink->reachableTerminals;
  115.         if (p->isTerminal) {
  116.             ++p->reachableTerminals;
  117.         }
  118.         for (const auto& p : p->transitions) {
  119.             if (p) {
  120.                 queue.push(p.get());
  121.             }
  122.         }
  123.     }
  124. }
  125.  
  126. auto Aho::advance(Node* p, char ch) const noexcept -> Node* {
  127.     const auto index = ord(ch);
  128.     if (p->transitions[index]) {
  129.         return p->transitions[index].get();
  130.     }
  131.     if (p == root.get()) {
  132.         return root.get();
  133.     }
  134.     return advance(p->suffixLink, ch);
  135. }
  136.  
  137. int Aho::numEntries(const std::string& s) const noexcept {
  138.     int result = 0;
  139.     Node* ptr = root.get();
  140.     for (char ch : s) {
  141.         ptr = advance(ptr, ch);
  142.         result += ptr->reachableTerminals;
  143.     }
  144.     return result;
  145. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement