Advertisement
Guest User

Untitled

a guest
Jun 28th, 2017
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.20 KB | None | 0 0
  1. /*====================================================================
  2. Title: Computes Longest Repeated Substring from a given string
  3. Complexity: O(n.log(n))
  4. Author : Sudipto Chandra (Dipu)
  5. =====================================================================*/
  6. #include <bits/stdc++.h>
  7. using namespace std;
  8. #define mem(a,b) memset(a, b, sizeof(a))
  9. #define loop(i, x) for(__typeof((x).begin()) i=(x).begin(); i!=(x).end(); ++i)
  10. #define rloop(i, x) for(__typeof((x).rbegin()) i=(x).rbegin(); i!=(x).rend(); ++i)
  11. /*------------------------------------------------------------------------------------*/
  12.  
  13. int test, cas = 1;
  14.  
  15. const int SIZ = 10000050; // maximum possible size
  16.  
  17. int n; // text length
  18. int m; // pattern length
  19. char T[SIZ]; // text
  20. char P[SIZ]; // pattern
  21. int SA[SIZ], tempSA[SIZ]; // the sorted suffixes
  22. int RA[SIZ], tempRA[SIZ]; // ranks of suffix array
  23. int L[SIZ]; // used in counting sort
  24.  
  25. inline int getRA(int i)
  26. {
  27. return (i < n) ? RA[i] : 0;
  28. }
  29.  
  30. void radix_sort(int k)
  31. {
  32. mem(L, 0);
  33. // count frequencies
  34. for(int i = 0; i < n; ++i)
  35. {
  36. L[getRA(i + k)]++;
  37. }
  38. // save first index of every characters
  39. int mx = max(n, 130);
  40. for(int i = 0, s = 0; i < mx; ++i)
  41. {
  42. int x = L[i];
  43. L[i] = s;
  44. s += x;
  45. }
  46. // build sorted tempSA
  47. for(int i = 0; i < n; ++i)
  48. {
  49. int& x = L[getRA(SA[i] + k)];
  50. tempSA[x++] = SA[i];
  51. }
  52. // copy tempSA to SA
  53. for(int i = 0; i < n; ++i)
  54. {
  55. SA[i] = tempSA[i];
  56. }
  57. }
  58. // text must ends with a $
  59. void buildSA()
  60. {
  61. // initialize
  62. n = strlen(T);
  63. T[n++] = '$', T[n] = 0; // append $
  64. for(int i = 0; i < n; ++i)
  65. {
  66. SA[i] = i;
  67. RA[i] = T[i];
  68. }
  69.  
  70. // algorithm loop
  71. for(int k = 1; k < n; k <<= 1)
  72. {
  73. // sort by k-th ranks
  74. radix_sort(k);
  75. radix_sort(0);
  76. // compute new ranks
  77. tempRA[SA[0]] = 0;
  78. for(int i = 1, r = 0; i < n; ++i)
  79. {
  80. if(getRA(SA[i-1]) != getRA(SA[i])) {
  81. r++;
  82. }
  83. else if(getRA(SA[i-1]+k) != getRA(SA[i]+k)) {
  84. r++;
  85. }
  86. tempRA[SA[i]] = r;
  87. }
  88. for(int i = 0; i < n; ++i)
  89. {
  90. RA[i] = tempRA[i];
  91. }
  92. if(RA[SA[n - 1]] == n - 1) break;
  93. }
  94. }
  95.  
  96. // functions used to count occurrences of pattern
  97. bool isLesser(int t, const char* p)
  98. {
  99. return strncmp(T + t, p, m) < 0;
  100. }
  101. bool isGreater(const char* p, int t)
  102. {
  103. return strncmp(T + t, p, m) > 0;
  104. }
  105.  
  106. // Count the number of times a given pattern P appears
  107. // in the string T. Complexity: O(n.log(n))
  108. int countOccurrences()
  109. {
  110. m = strlen(P);
  111.  
  112. int first = lower_bound(SA, SA + n, P, isLesser) - SA;
  113. int last = upper_bound(SA, SA + n, P, isGreater) - SA;
  114.  
  115. printf("%d %d\n", first, last);
  116. return last - first;
  117. }
  118.  
  119. int main()
  120. {
  121. /*
  122. GATAGACA
  123. GA
  124.  
  125. abcabxabcd
  126. abc
  127.  
  128. mississippi
  129. i
  130. */
  131.  
  132. printf("Text: ");
  133. scanf("%s", T);
  134. printf("Pattern: ");
  135. scanf("%s", P);
  136.  
  137. buildSA();
  138.  
  139. for(int i = 0; i < n; ++i)
  140. {
  141. printf("%2d | %2d | %s\n", i, SA[i], T + SA[i]);
  142. }
  143.  
  144. int ans = countOccurrences();
  145.  
  146. printf("Pattern found %d times\n", ans);
  147.  
  148. return 0;
  149. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement