Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma once
- #include"template.hpp"
- template<typename Char=char>
- class suffix_automaton{
- using str=std::basic_string<Char>;
- using view=std::basic_string_view<Char>;
- std::vector<bool>terminal;
- std::vector<std::vector<i32>>inv_link;
- std::vector<std::map<Char,i32>>next{{}};
- std::vector<i32>link{-1},len{0},firstpos{-1},terminal_paths_from;
- std::vector<i64>substrings_from,paths_from;
- str text;
- i32 size=1;
- bool extra=false;
- suffix_automaton(){}
- void reserve_memory(const i32 n){
- ASSERT(n>=0);
- text.reserve(n);
- const i32 max_size=std::max(n+1,2*n-1);
- next.reserve(max_size);
- link.reserve(max_size);
- len.reserve(max_size);
- firstpos.reserve(max_size);
- }
- void extend(const Char c,i32&last){
- ASSERT(0<=last&&last<size);
- text.push_back(c);
- i32 p=last;
- last=push_new_state();
- len[last]=len[p]+1;
- firstpos[last]=len[p];
- do{next[p][c]=last,p=link[p];}UNTIL(p==-1||next[p].count(c));
- if(p==-1){link[last]=0;}
- else{
- const i32 q=next[p][c];
- if(len[q]==len[p]+1){link[last]=q;}
- else{
- const i32 cl=push_copy_state(q);
- len[cl]=len[p]+1;
- link[last]=link[q]=cl;
- do{next[p][c]=cl,p=link[p];}UNTIL(p==-1||next[p].at(c)!=q);
- }
- }
- }
- i32 push_new_state(){
- next.emplace_back();
- link.push_back(-1);
- len.push_back(0);
- firstpos.push_back(-1);
- return size++;
- }
- i32 push_copy_state(const i32 state){
- ASSERT(0<=state&&state<size);
- next.push_back(next[state]),
- link.push_back(link[state]);
- len.push_back(len[state]);
- firstpos.push_back(firstpos[state]);
- return size++;
- }
- void count_paths(){
- ASSERT(extra);
- terminal_paths_from.resize(size);
- substrings_from.resize(size);
- paths_from.resize(size);
- automaton_dfs();
- }
- void automaton_dfs(const i32 curr=0){
- ASSERT(extra);
- ASSERT(0<=curr&&curr<size);
- ASSERT(terminal_paths_from[curr]==0);
- ASSERT(substrings_from[curr]==0);
- ASSERT(paths_from[curr]==0);
- terminal_paths_from[curr]=terminal[curr];
- paths_from[curr]=1;
- FORE(edge,next[curr]){
- const i32 other=edge.second;
- UNLESS(paths_from[other]){automaton_dfs(other);}
- terminal_paths_from[curr]+=terminal_paths_from[other];
- substrings_from[curr]+=substrings_from[other];
- paths_from[curr]+=paths_from[other];
- }
- substrings_from[curr]+=terminal_paths_from[curr];
- }
- void complete_states(const i32 last){
- ASSERT(0<=last&&last<size);
- terminal.resize(size);
- for(i32 curr=last;curr!=-1;curr=link[curr]){terminal[curr]=true;}
- inv_link.resize(size);
- for(i32 curr=1;curr<size;++curr){inv_link[link[curr]].push_back(curr);}
- }
- std::pair<i32,i32>run_pattern(const view&pattern)const{
- i32 curr=0,read=0;
- for(
- auto it=pattern.begin();
- it!=pattern.end()&&next[curr].count(*it);
- curr=next[curr].at(*(it++))
- ){++read;}
- return{curr,read};
- }
- public:
- suffix_automaton(const view&_text,const bool _extra=false):extra(_extra){
- reserve_memory(SIZE(_text));
- i32 last=0;
- FORE(c,_text){extend(c,last);}
- complete_states(last);
- if(extra){count_paths();}
- }
- const str&peek_text()const{return text;}
- std::pair<str,i32>longest_accepted_prefix(const view&pattern)const{
- const auto[state,read]=run_pattern(pattern);
- const i32 start=firstpos[state]+1-read;
- return{text.substr(start,read),start};
- }
- bool is_suffix(const view&pattern)const{
- if(SIZE(pattern)>SIZE(text)){return false;}
- const auto[state,read]=run_pattern(pattern);
- return terminal[state]&&read==SIZE(pattern);
- }
- bool is_substring(const view&pattern)const{
- if(SIZE(pattern)>SIZE(text)){return false;}
- return run_pattern(pattern).second==SIZE(pattern);
- }
- bool is_prefix(const view&pattern)const{
- if(SIZE(pattern)>SIZE(text)){return false;}
- return std::equal(ALL(pattern),text.begin());
- }
- std::vector<i32>all_borders()const{
- std::vector<i32>borders{0};
- i32 curr=0;
- FORIT(it,text){
- curr=next[curr].at(*it);
- if(terminal[curr]){borders.push_back(I32(it-text.begin())+1);}
- }
- return borders;
- }
- i32 number_occurrences(const view&pattern)const{
- ASSERT(extra);
- if(SIZE(pattern)>SIZE(text)){return 0;}
- const auto[state,read]=run_pattern(pattern);
- return read==SIZE(pattern)?terminal_paths_from[state]:0;
- }
- i32 first_occurrence(const view&pattern)const{
- if(SIZE(pattern)>SIZE(text)){return-1;}
- const i32 m=SIZE(pattern);
- const auto[state,read]=run_pattern(pattern);
- return read==m?firstpos[state]+1-m:-1;
- }
- std::vector<i32>all_occurrences(
- const view&pattern,const bool order=false
- )const{
- if(SIZE(pattern)>SIZE(text)){return{};}
- std::vector<i32>occs;
- const i32 m=SIZE(pattern);
- const auto[node,read]=run_pattern(pattern);
- if(read==m){
- std::stack<i32,std::vector<i32>>st{{node}};
- while(!st.empty()){
- const i32 curr=st.top();
- st.pop();
- occs.push_back(firstpos[curr]+1-m);
- FORE(child,inv_link[curr]){st.push(child);}
- }
- }
- if(order){
- sort(ALL(occs));
- occs.erase(std::unique(ALL(occs)),occs.end());
- }
- return occs;
- }
- i32 number_suffixes()const{
- ASSERT(extra);
- return terminal_paths_from[0];
- }
- std::pair<str,i32>kth_suffix(i32 k)const{
- ASSERT(extra);
- ASSERT(0<=k&&k<terminal_paths_from[0]);
- i32 start=0,m=0,curr=0;
- while((k-=terminal[curr])>=0){
- FORE(edge,next[curr]){
- const i32 other=edge.second;
- const i32 step=terminal_paths_from[other];
- if(k>=step){k-=step;}
- else{
- start=firstpos[curr=other]-(m++);
- break;
- }
- }
- }
- return{text.substr(start,m),start};
- }
- i64 number_substrings()const{
- ASSERT(extra);
- return substrings_from[0];
- }
- std::pair<str,i32>kth_substring(i64 k)const{
- ASSERT(extra);
- ASSERT(0<=k&&k<substrings_from[0]);
- i32 start=0,m=0,curr=0;
- while((k-=terminal_paths_from[curr])>=0){
- FORE(edge,next[curr]){
- const i32 other=edge.second;
- const i64 step=substrings_from[other];
- if(k>=step){k-=step;}
- else{
- start=firstpos[curr=other]-(m++);
- break;
- }
- }
- }
- return{text.substr(start,m),start};
- }
- i64 number_distinct_substrings()const{
- ASSERT(extra);
- return paths_from[0];
- }
- std::pair<str,i32>kth_distinct_substring(i64 k)const{
- ASSERT(extra);
- ASSERT(0<=k&&k<paths_from[0]);
- i32 start=0,m=0,curr=0;
- while((k-=1)>=0){
- FORE(edge,next[curr]){
- const i32 other=edge.second;
- const i64 step=paths_from[other];
- if(k>=step){k-=step;}
- else{
- start=firstpos[curr=other]-(m++);
- break;
- }
- }
- }
- return{text.substr(start,m),start};
- }
- std::pair<std::pair<str,i32>,i32>longest_repeating_substring()const{
- ASSERT(extra);
- ASSERT(!text.empty());
- i32 start=0,best_len=0,best_occs=SIZE(text)+1;
- for(i32 curr=1;curr<size;++curr){
- const i32 curr_len=len[curr],curr_occs=terminal_paths_from[curr];
- const bool is_better_len=curr_len>best_len&&curr_occs>1;
- const bool is_better_occs=curr_len==best_len&&curr_occs>best_occs;
- if(is_better_len||is_better_occs){
- start=firstpos[curr]+1-(best_len=curr_len);
- best_occs=curr_occs;
- }
- }
- return{{text.substr(start,best_len),start},best_occs};
- }
- std::pair<str,i32>longest_common_substring(const view&word)const{
- i32 start=0,curr_len=0,best_len=0,curr=0;
- FORE(c,word){
- while(curr&&!next[curr].count(c)){curr_len=len[curr=link[curr]];}
- if(next[curr].count(c)){curr=next[curr].at(c),++curr_len;}
- if(curr_len>best_len){start=firstpos[curr]+1-(best_len=curr_len);}
- }
- return{text.substr(start,best_len),start};
- }
- static std::pair<str,i32>smallest_cyclic_shift(const view&word){
- suffix_automaton sa;
- const i32 n=SIZE(word);
- sa.reserve_memory(2*n);
- i32 last=0;
- FORE(c,word){sa.extend(c,last);}
- FORE(c,word){sa.extend(c,last);}
- i32 start=0;
- for(i32 curr=0,read=0;read<n;++read){
- curr=sa.next[curr].begin()->second;
- start=sa.firstpos[curr]-read;
- }
- return{sa.text.substr(start,n),start};
- }
- };
Add Comment
Please, Sign In to add comment