Advertisement
Guest User

arovesto/task3

a guest
Oct 23rd, 2018
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 5.92 KB | None | 0 0
  1. #include <algorithm>
  2. #include <iostream>
  3. #include <string>
  4. #include <vector>
  5. #include <fstream>
  6.  
  7.  
  8. //structure is to store suffix index in string and allow sort by symbols
  9. struct suffix { size_t index = 0; int rank = 0; int next_rank = 0; };
  10.  
  11. //structure to sort by two letters (letter rank then letter next_ranc)
  12. bool cmp(struct suffix a, struct suffix b) {
  13.     return (a.rank == b.rank) ? (a.next_rank < b.next_rank) : (a.rank < b.rank);
  14. }
  15.  
  16. //function create suffix array from string
  17. std::vector<size_t> create_suff_array(const std::string& string) {
  18.     //at first, we initialize ouf suffixes structures with their indexes and fisrt two letters
  19.     std::vector<suffix> suffixes(string.length());
  20.     for (size_t i = 0; i < suffixes.size(); i++) {
  21.         suffixes[i].index = i;
  22.         suffixes[i].rank = string[i] - 'a';
  23.         suffixes[i].next_rank = ((i + 1) < string.length()) ? (string[i + 1] - 'a') : -1;
  24.     }
  25.     //now we sort by first two letters
  26.     sort(suffixes.begin(), suffixes.end(), cmp);
  27.     //map of original suffix index-in-string to our index-in-structure
  28.     std::vector<size_t> indexes(string.length());
  29.     //now we going to sort by 4, 8, 16... suffixes
  30.     for (int previous_lcp = 2; previous_lcp < string.length(); previous_lcp = previous_lcp * 2) {
  31.         //start our traverse from rank of first suffix
  32.         int rank = 0;
  33.         int prev_rank = suffixes[0].rank;
  34.         suffixes[0].rank = rank;
  35.         indexes[suffixes[0].index] = 0;
  36.         for (int i = 1; i < string.length(); i++) {
  37.             // If first rank and next are same as that of previous suffix, stay like that
  38.             if (suffixes[i].rank == prev_rank && suffixes[i].next_rank == suffixes[i - 1].next_rank) {
  39.                 prev_rank = suffixes[i].rank;
  40.                 suffixes[i].rank = rank;
  41.             }
  42.             //else we increment rank
  43.             else {
  44.                 prev_rank = suffixes[i].rank;
  45.                 rank += 1;
  46.                 suffixes[i].rank = rank;
  47.             }
  48.             //memorizing our index
  49.             indexes[suffixes[i].index] = i;
  50.         }
  51.         for (int i = 0; i < string.length(); i++) {
  52.             int next_index = suffixes[i].index + previous_lcp;
  53.             suffixes[i].next_rank = (next_index < string.length()) ?
  54.                 suffixes[indexes[next_index]].rank : -1;
  55.         }
  56.         //now we sort by first previous_lcp-length prefix of suffixes
  57.         sort(suffixes.begin(), suffixes.end(), cmp);
  58.     }
  59.     //returning in more trivial form
  60.     std::vector<size_t> suffix_array;
  61.     for (auto a : suffixes) suffix_array.push_back(a.index);
  62.     return suffix_array;
  63. }
  64.  
  65. //function to create lcp array(starting with -1) by string and suffix array
  66. std::vector<int> create_lcp(const std::string& string, std::vector<size_t>& suffix_array) {
  67.     //resulting array
  68.     std::vector<int> lcp(string.length(), 0);
  69.     //creating suffix_array^(-1), meant suffix_array[3] = 1 <=> inverse_suffixes[1] = 3
  70.     std::vector<int> inverse_suffixes(string.length(), 0);
  71.     for (int i = 0; i < string.length(); i++) inverse_suffixes[suffix_array[i]] = i;
  72.     //length of previous lcp
  73.     int previous_lcp = 0;
  74.     for (int i = 0; i < string.length(); i++) {
  75.         //on next step previous lcp become on 1 shorter
  76.         if (previous_lcp > 0) previous_lcp -= 1;
  77.         //here we got no good string to consider
  78.         if (inverse_suffixes[i] == 0) { lcp[0] = -1; previous_lcp = 0; }
  79.         else {
  80.             //next string in suffix array to be considered
  81.             int j = suffix_array[inverse_suffixes[i] - 1];
  82.             //traversing j - substring
  83.             while (i + previous_lcp < string.length() && j + previous_lcp < string.length() &&
  84.                 string[i + previous_lcp] == string[j + previous_lcp]) previous_lcp += 1;
  85.             lcp[inverse_suffixes[i]] = previous_lcp;
  86.         }
  87.     }
  88.     return lcp;
  89. }
  90.  
  91. //function to create bool array and sets[i] = to wich string is suffix[i] belong to
  92. std::vector<bool> create_sets(const std::string& string, const std::vector<size_t>& suffix_array, const size_t divider_index) {
  93.     //creating result array, initialized with ones and some values replaced with zero if needed
  94.     std::vector<bool> sets(string.length(), 1);
  95.     for (int64_t i = 0; i < suffix_array.size(); i++) if (suffix_array[i] <= divider_index) sets[i] = 0;
  96.     return sets;
  97. }
  98.  
  99. //function to find k-th common substring of two strings, and k could be up to 10^18
  100. std::string search_k_substring(const std::string& first_string, const std::string& second_string, int64_t k) {
  101.     //concatination with special symbols spliters, and split one should be > split two
  102.     std::string concat = first_string + '%' + second_string + '$';
  103.     //creating all neaded structures, mentioned above
  104.     std::vector<size_t> suffix_array = create_suff_array(concat);
  105.     std::vector<int> lcp = create_lcp(concat, suffix_array);
  106.     std::vector<bool> sets = create_sets(concat, suffix_array, first_string.length());
  107.     //substring counter, starts with 1 because in task substrings counted from 1
  108.     int64_t substring_counter = 1;
  109.     //every next non zero prefix of suffix of different strings created lcp - previous_lcp new substings
  110.     size_t previous_lcp = 0;
  111.     /*general algorythm is to go through sets array, if we see change from 1 to zero in it, so it is two
  112.     suffixes of different strings. So them have an prefix, and this prefix creates common substrings. And
  113.     because of suffix array sorted, it will be in appropriate order*/
  114.     for (size_t i = 1; i < concat.length(); i++) {
  115.         //our lcp is the smalest lcp on segment between sets array change
  116.         previous_lcp = (previous_lcp < lcp[i]) ? previous_lcp : lcp[i];
  117.         //we found sets change with good lcp!
  118.         if (lcp[i] > 0 && sets[i] != sets[i - 1]) {
  119.             //if we near to answer, we have to just slice our concat to answer!
  120.             if (k < substring_counter + lcp[i] - previous_lcp)
  121.                 return concat.substr(suffix_array[i], previous_lcp + 1 + k - substring_counter);
  122.             //continue substrings counter
  123.             substring_counter += lcp[i] - previous_lcp;
  124.             previous_lcp = lcp[i];
  125.         }
  126.     }
  127.     //we failed to found anything, returning -1 as task is want from us
  128.     return "-1";
  129. }
  130.  
  131. int main() {
  132.     //creating and getting strings, k, and calucalate result
  133.     std::string str1, str2;
  134.     std::cin >> str1 >> str2;
  135.     int64_t k;
  136.     std::cin >> k;
  137.     std::cout << search_k_substring(str1, str2, k);
  138.     return 0;
  139. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement