Advertisement
Guest User

Untitled

a guest
Jun 28th, 2017
66
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.65 KB | None | 0 0
  1. /*====================================================================
  2. Title: Computes Longest Repeated Substring from a given string
  3. Complexity: O(n.log(n))
  4. Author : Sudipto Chandra (Dipu)
  5. =====================================================================*/
  6. #include <bits/stdc++.h>
  7. using namespace std;
  8. #define mem(a,b) memset(a, b, sizeof(a))
  9. #define loop(i, x) for(__typeof((x).begin()) i=(x).begin(); i!=(x).end(); ++i)
  10. #define rloop(i, x) for(__typeof((x).rbegin()) i=(x).rbegin(); i!=(x).rend(); ++i)
  11. /*------------------------------------------------------------------------------------*/
  12.  
  13. int test, cas = 1;
  14.  
  15. const int SIZ = 10000050; // maximum possible size
  16.  
  17. int n; // text length
  18. int m; // pattern length
  19. char T[SIZ]; // text
  20. char P[SIZ]; // pattern
  21. int SA[SIZ], tempSA[SIZ]; // the sorted suffixes
  22. int RA[SIZ], tempRA[SIZ]; // ranks of suffix array
  23. int L[SIZ]; // used in counting sort
  24.  
  25. inline int getRA(int i)
  26. {
  27. return (i < n) ? RA[i] : 0;
  28. }
  29.  
  30. void radix_sort(int k)
  31. {
  32. mem(L, 0);
  33. // count frequencies
  34. for(int i = 0; i < n; ++i)
  35. {
  36. L[getRA(i + k)]++;
  37. }
  38. // save first index of every characters
  39. int mx = max(n, 130);
  40. for(int i = 0, s = 0; i < mx; ++i)
  41. {
  42. int x = L[i];
  43. L[i] = s;
  44. s += x;
  45. }
  46. // build sorted tempSA
  47. for(int i = 0; i < n; ++i)
  48. {
  49. int& x = L[getRA(SA[i] + k)];
  50. tempSA[x++] = SA[i];
  51. }
  52. // copy tempSA to SA
  53. for(int i = 0; i < n; ++i)
  54. {
  55. SA[i] = tempSA[i];
  56. }
  57. }
  58. // text must ends with a $
  59. void buildSA()
  60. {
  61. // initialize
  62. n = strlen(T);
  63. T[n++] = '$', T[n] = 0; // append $
  64. for(int i = 0; i < n; ++i)
  65. {
  66. SA[i] = i;
  67. RA[i] = T[i];
  68. }
  69.  
  70. // algorithm loop
  71. for(int k = 1; k < n; k <<= 1)
  72. {
  73. // sort by k-th ranks
  74. radix_sort(k);
  75. radix_sort(0);
  76. // compute new ranks
  77. tempRA[SA[0]] = 0;
  78. for(int i = 1, r = 0; i < n; ++i)
  79. {
  80. if(getRA(SA[i-1]) != getRA(SA[i])) {
  81. r++;
  82. }
  83. else if(getRA(SA[i-1]+k) != getRA(SA[i]+k)) {
  84. r++;
  85. }
  86. tempRA[SA[i]] = r;
  87. }
  88. for(int i = 0; i < n; ++i)
  89. {
  90. RA[i] = tempRA[i];
  91. }
  92. if(RA[SA[n - 1]] == n - 1) break;
  93. }
  94. }
  95.  
  96. // Count the number of times a given pattern P appears
  97. // in the string T. Complexity: O(n.log(n))
  98. int countOccurences()
  99. {
  100. m = strlen(P);
  101. SA[n] = n;
  102.  
  103. int first, last;
  104. int low, high, mid;
  105.  
  106. // find lower bound
  107. low = 0, high = n;
  108. while(low < high)
  109. {
  110. mid = (low + high) >> 1;
  111. printf("lower: %d %d %d\n", low, mid, high);
  112. if(strncmp(P, T + SA[mid], m) <= 0)
  113. {
  114. high = mid;
  115. }
  116. else
  117. {
  118. low = mid + 1;
  119. }
  120. }
  121. printf("lower: %d %d %d\n", low, mid, high);
  122. first = low;
  123.  
  124. // find higher bound
  125. low = 0, high = n;
  126. while(low < high)
  127. {
  128. mid = (low + high) >> 1;
  129. printf("upper: %d %d %d\n", low, mid, high);
  130. if(strncmp(P, T + SA[mid], m) < 0)
  131. {
  132. high = mid;
  133. }
  134. else
  135. {
  136. low = mid + 1;
  137. }
  138. }
  139. printf("upper: %d %d %d\n", low, mid, high);
  140. last = low;
  141.  
  142. printf("%d %d\n", first, last);
  143. return last - first;
  144. }
  145.  
  146. int main()
  147. {
  148. /*
  149. GATAGACA
  150. GA
  151.  
  152. abcabxabcd
  153. abc
  154. */
  155.  
  156. printf("Text: ");
  157. scanf("%s", T);
  158. printf("Pattern: ");
  159. scanf("%s", P);
  160.  
  161. buildSA();
  162.  
  163. for(int i = 0; i < n; ++i)
  164. {
  165. printf("%2d | %2d | %s\n", i, SA[i], T + SA[i]);
  166. }
  167.  
  168. int ans = countOccurences();
  169.  
  170. printf("Pattern found %d times\n", ans);
  171.  
  172. return 0;
  173. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement