Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <unordered_map>
- #include <queue>
- using namespace std;
- struct node
- {
- unordered_map<char,int> child; // map of child characters to index in trie array
- bool leaf = false; // true if node corresponds to word, false otherwise
- int parent = -1; // index of parent in trie array
- char ch = '?'; // corresponding character
- int suffix_link = -1; // index of suffix link
- int dict_suffix_link = -1; // index of dict suffix link
- int word_id = -1; // index of corresponding word in words array, if leaf
- };
- struct aho
- {
- vector<node*> trie;
- vector<string> words;
- int num_words = 0;
- int size = 0;
- int root;
- // trie insert
- void insert(string s)
- {
- int cur = root;
- for(char c : s)
- {
- if(trie[cur]->child.find(c) == trie[cur]->child.end())
- {
- trie.push_back(new node());
- trie[size]->parent = cur;
- trie[size]->ch = c;
- trie[cur]->child[c] = size;
- size++;
- }
- cur = trie[cur]->child[c];
- }
- trie[cur]->leaf = true;
- trie[cur]->word_id = num_words++;
- words.push_back(s);
- }
- aho()
- {
- root = 0;
- trie.push_back(new node());
- size++;
- }
- void calc_suffix_link(int cur)
- {
- // base case: suffix link and dict suffix link are root
- if(cur==root || trie[cur]->parent==root)
- {
- trie[cur]->suffix_link = trie[cur]->dict_suffix_link = root;
- return;
- }
- // parent's suffix link
- int par_link = trie[trie[cur]->parent]->suffix_link;
- // cur's character
- char c = trie[cur]->ch;
- // move parent's suffix link until we reach root or contains c
- while(true)
- {
- if(trie[par_link]->child.find(c) != trie[par_link]->child.end())
- {
- trie[cur]->suffix_link = trie[par_link]->child[c];
- break;
- }
- if(par_link == root)
- {
- trie[cur]->suffix_link = root;
- break;
- }
- par_link = trie[par_link]->suffix_link;
- }
- // if cur's suffix link is a word, then cur's dict suffix link is cur's suffix link
- // otherwise, cur's suffix link is cur's suffix link's dict suffix link
- if(trie[trie[cur]->suffix_link]->leaf) trie[cur]->dict_suffix_link = trie[cur]->suffix_link;
- else trie[cur]->dict_suffix_link = trie[trie[cur]->suffix_link]->dict_suffix_link;
- }
- // BFS to call calc_suffix_link on each node
- void prep()
- {
- queue<int> q;
- q.push(root);
- while(!q.empty())
- {
- int cur = q.front(); q.pop();
- calc_suffix_link(cur);
- for( auto p : trie[cur]->child )
- q.push(p.second);
- }
- }
- int process(string t)
- {
- int cur = root;
- int result = 0;
- // for each character in t
- for(int j=0;j<t.size();j++)
- {
- // if t[j] is a child of the current node, move to that child
- // otherwise move to its suffix link
- // stop if we hit the root
- while(true)
- {
- if(trie[cur]->child.find(t[j]) != trie[cur]->child.end())
- {
- cur = trie[cur]->child[t[j]];
- break;
- }
- if(cur == root) break;
- cur = trie[cur] -> suffix_link;
- }
- // if cur is a leaf, we have found a match
- if(trie[cur]->leaf)
- {
- string word = words[trie[cur]->word_id];
- int index_of_match = j - word.size() + 1;
- cout << "Found "<< word << " at "<<index_of_match<<'\n';
- result++;
- }
- // print all matches that end at the current character
- int check = cur;
- while(true)
- {
- check = trie[check]->dict_suffix_link;
- if(check==root) break;
- string word = words[trie[check]->word_id];
- int index_of_match = j - word.size() + 1;
- cout << "Found "<< word << " at "<<index_of_match<<'\n';
- result++;
- }
- }
- return result;
- }
- };
- int main()
- {
- aho a;
- a.insert("a");
- a.insert("ab");
- a.insert("bab");
- a.insert("bc");
- a.insert("bca");
- a.insert("c");
- a.insert("caa");
- a.prep();
- a.process("abccab");
- }
Advertisement
Add Comment
Please, Sign In to add comment