Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <algorithm>
- #include <iostream>
- #include <string>
- #include <vector>
- #include <fstream>
- //structure is to store suffix index in string and allow sort by symbols
- struct suffix { size_t index = 0; int rank = 0; int next_rank = 0; };
- //structure to sort by two letters (letter rank then letter next_ranc)
- bool cmp(struct suffix a, struct suffix b) {
- return (a.rank == b.rank) ? (a.next_rank < b.next_rank) : (a.rank < b.rank);
- }
- //function create suffix array from string
- std::vector<size_t> create_suff_array(const std::string& string) {
- //at first, we initialize ouf suffixes structures with their indexes and fisrt two letters
- std::vector<suffix> suffixes(string.length());
- for (size_t i = 0; i < suffixes.size(); i++) {
- suffixes[i].index = i;
- suffixes[i].rank = string[i] - 'a';
- suffixes[i].next_rank = ((i + 1) < string.length()) ? (string[i + 1] - 'a') : -1;
- }
- //now we sort by first two letters
- sort(suffixes.begin(), suffixes.end(), cmp);
- //map of original suffix index-in-string to our index-in-structure
- std::vector<size_t> indexes(string.length());
- //now we going to sort by 4, 8, 16... suffixes
- for (int previous_lcp = 2; previous_lcp < string.length(); previous_lcp = previous_lcp * 2) {
- //start our traverse from rank of first suffix
- int rank = 0;
- int prev_rank = suffixes[0].rank;
- suffixes[0].rank = rank;
- indexes[suffixes[0].index] = 0;
- for (int i = 1; i < string.length(); i++) {
- // If first rank and next are same as that of previous suffix, stay like that
- if (suffixes[i].rank == prev_rank && suffixes[i].next_rank == suffixes[i - 1].next_rank) {
- prev_rank = suffixes[i].rank;
- suffixes[i].rank = rank;
- }
- //else we increment rank
- else {
- prev_rank = suffixes[i].rank;
- rank += 1;
- suffixes[i].rank = rank;
- }
- //memorizing our index
- indexes[suffixes[i].index] = i;
- }
- for (int i = 0; i < string.length(); i++) {
- int next_index = suffixes[i].index + previous_lcp;
- suffixes[i].next_rank = (next_index < string.length()) ?
- suffixes[indexes[next_index]].rank : -1;
- }
- //now we sort by first previous_lcp-length prefix of suffixes
- sort(suffixes.begin(), suffixes.end(), cmp);
- }
- //returning in more trivial form
- std::vector<size_t> suffix_array;
- for (auto a : suffixes) suffix_array.push_back(a.index);
- return suffix_array;
- }
- //function to create lcp array(starting with -1) by string and suffix array
- std::vector<int> create_lcp(const std::string& string, std::vector<size_t>& suffix_array) {
- //resulting array
- std::vector<int> lcp(string.length(), 0);
- //creating suffix_array^(-1), meant suffix_array[3] = 1 <=> inverse_suffixes[1] = 3
- std::vector<int> inverse_suffixes(string.length(), 0);
- for (int i = 0; i < string.length(); i++) inverse_suffixes[suffix_array[i]] = i;
- //length of previous lcp
- int previous_lcp = 0;
- for (int i = 0; i < string.length(); i++) {
- //on next step previous lcp become on 1 shorter
- if (previous_lcp > 0) previous_lcp -= 1;
- //here we got no good string to consider
- if (inverse_suffixes[i] == 0) { lcp[0] = -1; previous_lcp = 0; }
- else {
- //next string in suffix array to be considered
- int j = suffix_array[inverse_suffixes[i] - 1];
- //traversing j - substring
- while (i + previous_lcp < string.length() && j + previous_lcp < string.length() &&
- string[i + previous_lcp] == string[j + previous_lcp]) previous_lcp += 1;
- lcp[inverse_suffixes[i]] = previous_lcp;
- }
- }
- return lcp;
- }
- //function to create bool array and sets[i] = to wich string is suffix[i] belong to
- std::vector<bool> create_sets(const std::string& string, const std::vector<size_t>& suffix_array, const size_t divider_index) {
- //creating result array, initialized with ones and some values replaced with zero if needed
- std::vector<bool> sets(string.length(), 1);
- for (int64_t i = 0; i < suffix_array.size(); i++) if (suffix_array[i] <= divider_index) sets[i] = 0;
- return sets;
- }
- //function to find k-th common substring of two strings, and k could be up to 10^18
- std::string search_k_substring(const std::string& first_string, const std::string& second_string, int64_t k) {
- //concatination with special symbols spliters, and split one should be > split two
- std::string concat = first_string + '%' + second_string + '$';
- //creating all neaded structures, mentioned above
- std::vector<size_t> suffix_array = create_suff_array(concat);
- std::vector<int> lcp = create_lcp(concat, suffix_array);
- std::vector<bool> sets = create_sets(concat, suffix_array, first_string.length());
- //substring counter, starts with 1 because in task substrings counted from 1
- int64_t substring_counter = 1;
- //every next non zero prefix of suffix of different strings created lcp - previous_lcp new substings
- size_t previous_lcp = 0;
- /*general algorythm is to go through sets array, if we see change from 1 to zero in it, so it is two
- suffixes of different strings. So them have an prefix, and this prefix creates common substrings. And
- because of suffix array sorted, it will be in appropriate order*/
- for (size_t i = 1; i < concat.length(); i++) {
- //our lcp is the smalest lcp on segment between sets array change
- previous_lcp = (previous_lcp < lcp[i]) ? previous_lcp : lcp[i];
- //we found sets change with good lcp!
- if (lcp[i] > 0 && sets[i] != sets[i - 1]) {
- //if we near to answer, we have to just slice our concat to answer!
- if (k < substring_counter + lcp[i] - previous_lcp)
- return concat.substr(suffix_array[i], previous_lcp + 1 + k - substring_counter);
- //continue substrings counter
- substring_counter += lcp[i] - previous_lcp;
- previous_lcp = lcp[i];
- }
- }
- //we failed to found anything, returning -1 as task is want from us
- return "-1";
- }
- int main() {
- //creating and getting strings, k, and calucalate result
- std::string str1, str2;
- std::cin >> str1 >> str2;
- int64_t k;
- std::cin >> k;
- std::cout << search_k_substring(str1, str2, k);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement