Advertisement
maxim_shlyahtin

Aho-Corasik modified output

May 31st, 2024
594
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 6.32 KB | None | 0 0
  1. #include <iostream>
  2. #include <map>
  3. #include <vector>
  4. #include <string>
  5. #include <array>
  6. #include <utility>
  7. #include <algorithm>
  8.  
  9. const int N = 5;
  10. std::vector<std::string> patterns;
  11. std::array<char, N> alphabet = { 'A', 'C', 'G', 'T', 'N' };
  12.  
  13. struct Node {
  14.     std::map<char, Node*> son;
  15.     std::map<char, Node*> go;
  16.     Node* parent;
  17.     Node* suffLink;
  18.     Node* up;
  19.     char charToParent;
  20.     bool isTerminal;
  21.     std::vector<int> leafPatternNumber;
  22.  
  23.     Node() {
  24.         parent = NULL;
  25.         suffLink = NULL;
  26.         up = NULL;
  27.         charToParent = ' ';
  28.         isTerminal = false;
  29.     }
  30. };
  31.  
  32. struct Trie {
  33. public:
  34.     Node* root;
  35.  
  36.     Trie() {
  37.         root = new Node();
  38.     }
  39.  
  40.     Node* getSuffLink(Node* v) {
  41.         if (v->suffLink == NULL) {
  42.             if (v == root || v->parent == root) {
  43.                 v->suffLink = root;
  44.             } else {
  45.                 v->suffLink = getLink(getSuffLink(v->parent), v->charToParent);
  46.             }
  47.         }
  48.         return v->suffLink;
  49.     }
  50.  
  51.     Node* getLink(Node* v, char c) {
  52.         if (v->go[c] == NULL) {
  53.             if (v->son[c]) {
  54.                 v->go[c] = v->son[c];
  55.             } else if (v == root) {
  56.                 v->go[c] = root;
  57.             } else {
  58.                 v->go[c] = getLink(getSuffLink(v), c);
  59.             }
  60.         }
  61.         return v->go[c];
  62.     }
  63.  
  64.     Node* getUp(Node* v) {
  65.         if (v->up == NULL) {
  66.             if (getSuffLink(v)->isTerminal) {
  67.                 v->up = getSuffLink(v);
  68.             } else if (getSuffLink(v) == root) {
  69.                 v->up = root;
  70.             } else {
  71.                 v->up = getUp(getSuffLink(v));
  72.             }
  73.         }
  74.         return v->up;
  75.     }
  76.  
  77.     void addString(const std::string& s, int patternNumber) {
  78.         std::cout << "/-----------------\\\n";
  79.         std::cout << "Adding sample: " << s << "\n";
  80.        
  81.         Node* curr = root;
  82.         for (char c : s) {
  83.             std::cout << "current char: " << c << "\n";
  84.             if (curr->son[c] == NULL) {
  85.                 curr->son[c] = new Node();
  86.                 curr->son[c]->parent = curr;
  87.                 curr->son[c]->charToParent = c;
  88.                 std::cout << "Adding new edge: " << (curr - root) << "{" << curr->charToParent << "} -> " << (curr->son[c] - root) << "{" << c << "}\n";
  89.             }
  90.             curr = curr->son[c];
  91.             std::cout << "Current vertex: " << (curr - root) << "{" << c << "}\n";
  92.         }
  93.         curr->isTerminal = true;
  94.         curr->leafPatternNumber.push_back(patternNumber);
  95.         std::cout << "Vertex " << (curr - root) << "{" << curr->charToParent << "} marked final\n";
  96.         std::cout << "Sample added " << s << "\n";
  97.         std::cout << "\\-----------------/\n";
  98.     }
  99.  
  100.     void check(Node* v, int i) {
  101.         Node* u = v;
  102.         while (u != root) {
  103.             if (u->isTerminal) {
  104.                 for (int patNum : u->leafPatternNumber) {
  105.                     std::cout << "<!> Found match at position " << i - patterns[patNum].length() + 1 << " of pattern " << patterns[patNum] << "\n";
  106.                 }
  107.             }
  108.             u = getUp(u);
  109.         }
  110.     }
  111.  
  112.     void find_all_pos(std::string& t) {
  113.         Node* u = root;
  114.         for (size_t i = 0; i < t.length(); ++i) {
  115.             std::cout << "STATE " << (u - root) << " INPUT " << t[i] << " AT POSITION " << i << "\n";
  116.             std::cout << "/>=>=>=>=>=>=>=>=>\\\n";
  117.             std::cout << "Getting advance from vertex " << (u - root) << "{" << u->charToParent << "} by char " << t[i] << "\n";
  118.             u = getLink(u, t[i]);
  119.             std::cout << "Advance from vertex " << (u - root) << "{" << u->charToParent << "} by char " << t[i] << " is " << (u - root) << "{" << t[i] << "}\n";
  120.             std::cout << "\\>=>=>=>=>=>=>=>=>/\n";
  121.             check(u, i + 1);
  122.         }
  123.     }
  124.  
  125.     void processText(std::string& s, std::vector<std::pair<int, int>>& ans) {
  126.         std::cout << "Input string: " << s << "\n";
  127.         Node* curr = root;
  128.         for (size_t i = 0; i < s.length(); ++i) {
  129.             char c = s[i];
  130.             std::cout << "STATE " << (curr - root) << " INPUT " << c << " AT POSITION " << i << "\n";
  131.             std::cout << "/>=>=>=>=>=>=>=>=>\\\n";
  132.             std::cout << "Getting advance from vertex " << (curr - root) << "{" << curr->charToParent << "} by char " << c << "\n";
  133.             curr = getLink(curr, c);
  134.             std::cout << "Advance from vertex " << (curr - root) << "{" << curr->charToParent << "} by char " << c << " is " << (curr - root) << "{" << c << "}\n";
  135.             std::cout << "\\>=>=>=>=>=>=>=>=>/\n";
  136.  
  137.             Node* checkNode = curr;
  138.             while (checkNode != root) {
  139.                 std::cout << "Follow vertex " << (checkNode - root) << "{" << checkNode->charToParent << "} for matches\n";
  140.                 if (checkNode->isTerminal) {
  141.                     for (auto& patNum : checkNode->leafPatternNumber) {
  142.                         std::cout << "<!> Found match at position " << (i + 2 - patterns[patNum].length()) << " of pattern " << patterns[patNum] << "\n";
  143.                         ans.push_back(std::make_pair(i + 2 - patterns[patNum].length(), patNum + 1));
  144.                     }
  145.                 }
  146.                 std::cout << "/->->->->->->->->->\\\n";
  147.                 std::cout << "Getting follow suffix link for vertex " << (checkNode - root) << "{" << checkNode->charToParent << "}\n";
  148.                 checkNode = getUp(checkNode);
  149.                 std::cout << "Follow suffix link for vertex " << (checkNode - root) << "{" << checkNode->charToParent << "} is " << (checkNode - root) << "{" << checkNode->charToParent << "}\n";
  150.                 std::cout << "\\->->->->->->->->->/\n";
  151.             }
  152.         }
  153.     }
  154.  
  155.     ~Trie() = default;
  156.  
  157. } typedef Trie;
  158.  
  159. int main() {
  160.     Trie tr;
  161.     std::string t;
  162.     std::cin >> t;
  163.     int n;
  164.     std::cin >> n;
  165.     for (size_t i = 0; i < n; ++i) {
  166.         std::string str;
  167.         std::cin >> str;
  168.         patterns.push_back(str);
  169.     }
  170.  
  171.     for (size_t i = 0; i < patterns.size(); ++i) {
  172.         tr.addString(patterns[i], i);
  173.     }
  174.     std::vector<std::pair<int, int>> ans;
  175.     tr.processText(t, ans);
  176.     std::sort(ans.begin(), ans.end());
  177.     for (const auto& it : ans) {
  178.         std::cout << it.first << ' ' << it.second << '\n';
  179.     }
  180.     return 0;
  181. }
  182.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement