Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*====================================================================
- Title: Computes Longest Repeated Substring from a given string
- Complexity: O(n.log(n))
- Author : Sudipto Chandra (Dipu)
- =====================================================================*/
- #include <bits/stdc++.h>
- using namespace std;
- #define mem(a,b) memset(a, b, sizeof(a))
- #define loop(i, x) for(__typeof((x).begin()) i=(x).begin(); i!=(x).end(); ++i)
- #define rloop(i, x) for(__typeof((x).rbegin()) i=(x).rbegin(); i!=(x).rend(); ++i)
- /*------------------------------------------------------------------------------------*/
- int test, cas = 1;
- const int SIZ = 10000050; // maximum possible size
- int n; // text length
- int m; // pattern length
- char T[SIZ]; // text
- char P[SIZ]; // pattern
- int SA[SIZ], tempSA[SIZ]; // the sorted suffixes
- int RA[SIZ], tempRA[SIZ]; // ranks of suffix array
- int L[SIZ]; // used in counting sort
- inline int getRA(int i)
- {
- return (i < n) ? RA[i] : 0;
- }
- void radix_sort(int k)
- {
- mem(L, 0);
- // count frequencies
- for(int i = 0; i < n; ++i)
- {
- L[getRA(i + k)]++;
- }
- // save first index of every characters
- int mx = max(n, 130);
- for(int i = 0, s = 0; i < mx; ++i)
- {
- int x = L[i];
- L[i] = s;
- s += x;
- }
- // build sorted tempSA
- for(int i = 0; i < n; ++i)
- {
- int& x = L[getRA(SA[i] + k)];
- tempSA[x++] = SA[i];
- }
- // copy tempSA to SA
- for(int i = 0; i < n; ++i)
- {
- SA[i] = tempSA[i];
- }
- }
- // text must ends with a $
- void buildSA()
- {
- // initialize
- n = strlen(T);
- T[n++] = '$', T[n] = 0; // append $
- for(int i = 0; i < n; ++i)
- {
- SA[i] = i;
- RA[i] = T[i];
- }
- // algorithm loop
- for(int k = 1; k < n; k <<= 1)
- {
- // sort by k-th ranks
- radix_sort(k);
- radix_sort(0);
- // compute new ranks
- tempRA[SA[0]] = 0;
- for(int i = 1, r = 0; i < n; ++i)
- {
- if(getRA(SA[i-1]) != getRA(SA[i])) {
- r++;
- }
- else if(getRA(SA[i-1]+k) != getRA(SA[i]+k)) {
- r++;
- }
- tempRA[SA[i]] = r;
- }
- for(int i = 0; i < n; ++i)
- {
- RA[i] = tempRA[i];
- }
- if(RA[SA[n - 1]] == n - 1) break;
- }
- }
- // Count the number of times a given pattern P appears
- // in the string T. Complexity: O(n.log(n))
- int countOccurences()
- {
- m = strlen(P);
- SA[n] = n;
- int first, last;
- int low, high, mid;
- // find lower bound
- low = 0, high = n;
- while(low < high)
- {
- mid = (low + high) >> 1;
- printf("lower: %d %d %d\n", low, mid, high);
- if(strncmp(P, T + SA[mid], m) <= 0)
- {
- high = mid;
- }
- else
- {
- low = mid + 1;
- }
- }
- printf("lower: %d %d %d\n", low, mid, high);
- first = low;
- // find higher bound
- low = 0, high = n;
- while(low < high)
- {
- mid = (low + high) >> 1;
- printf("upper: %d %d %d\n", low, mid, high);
- if(strncmp(P, T + SA[mid], m) < 0)
- {
- high = mid;
- }
- else
- {
- low = mid + 1;
- }
- }
- printf("upper: %d %d %d\n", low, mid, high);
- last = low;
- printf("%d %d\n", first, last);
- return last - first;
- }
- int main()
- {
- /*
- GATAGACA
- GA
- abcabxabcd
- abc
- */
- printf("Text: ");
- scanf("%s", T);
- printf("Pattern: ");
- scanf("%s", P);
- buildSA();
- for(int i = 0; i < n; ++i)
- {
- printf("%2d | %2d | %s\n", i, SA[i], T + SA[i]);
- }
- int ans = countOccurences();
- printf("Pattern found %d times\n", ans);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement