matbensch

Fast Substring Hashing Implementation

Feb 12th, 2022 (edited)
209
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.66 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <string>
  4.  
  5. using namespace std;
  6.  
  7. long long m = 1e9 + 9;
  8. long long p = 31;
  9.  
  10. // computes a^b in O(log b)
  11. long long fast_pow(long long a, long long b)
  12. {
  13.     if (b == 0) return 1;
  14.     long long half = fast_pow(a, b / 2);
  15.     if (b % 2 == 0) return half * half % m;
  16.     return a * half % m * half % m;
  17. }
  18.  
  19. // returns a vector of size n+1 where inverse_ptoa[i] = inverse_mod(p^a)
  20. vector<long long> comp_inverse_ptoa(int n)
  21. {
  22.     vector<long long> inverse_ptoa;
  23.     for (int a = 0; a <= n; a++)
  24.         inverse_ptoa.push_back(fast_pow(fast_pow(p, a), m - 2));
  25.     return inverse_ptoa;
  26. }
  27.  
  28. // returns a vector where pref_hash[i] = hash(s[0...i-1])
  29. vector<long long> comp_pref_hash(string& s)
  30. {
  31.     vector<long long> pref_hash;
  32.     int n = s.size();
  33.     vector<long long> p_pow(n);
  34.     p_pow[0] = 1;
  35.     for (int i = 1; i < n; i++)
  36.         p_pow[i] = (p_pow[i - 1] * p) % m;
  37.  
  38.     pref_hash.push_back(0);
  39.     for (int i = 0; i < n; i++)
  40.         pref_hash.push_back(((pref_hash[i] + ((s[i] - 'a' + 1) * p_pow[i]) % m)) % m);
  41.     return pref_hash;
  42. }
  43.  
  44. // returns hash(s[a...b]) in O(1)
  45. long long substr_hash(vector<long long>& inverse_ptoa, vector<long long>& pref_hash, int a, int b)
  46. {
  47.     return inverse_ptoa[a] * ((m + (pref_hash[b] - pref_hash[a])) % m) % m;
  48. }
  49.  
  50. // computes the hash of a string in O(n)
  51. long long compute_hash(string& s) {
  52.     int p = 31;
  53.     int m = 1e9 + 9;
  54.     long long hash_value = 0;
  55.     long long p_pow = 1;
  56.     for (char c : s) {
  57.         hash_value = (hash_value + (c - 'a' + 1) * p_pow) % m;
  58.         p_pow = (p_pow * p) % m;
  59.     }
  60.     return hash_value;
  61. }
  62.  
  63. int main()
  64. {
  65.     string s = "supercallifragilisticexpialidocious";
  66.     int n = s.size();
  67.     vector<long long> inverse_ptoa1 = comp_inverse_ptoa(n);
  68.     vector<long long> pref_hash1 = comp_pref_hash(s);
  69.  
  70.     int tests_passed = 0;
  71.     int tests = 0;
  72.  
  73.     for (int i = 0; i < n; i++)
  74.     {
  75.         for (int j = i + 1; j <= n; j++)
  76.         {
  77.             string sub = s.substr(i, j - i);
  78.             int fast_hash = substr_hash(inverse_ptoa1,pref_hash1, i, j);
  79.             int slow_hash = compute_hash(sub);
  80.             cout << "Hashing substring: " << sub << '\n';
  81.             cout << "Slow substring hash O(n) found: " << slow_hash << '\n';
  82.             cout << "Fast substring hash O(1) found: " << fast_hash << '\n';
  83.             if (slow_hash == fast_hash)
  84.             {
  85.                 tests_passed++;
  86.                 cout << "passed\n\n";
  87.             }
  88.             else
  89.                 cout << "failed\n\n";
  90.             tests++;
  91.         }
  92.     }
  93.  
  94.     cout << tests_passed << " / " << tests << " tests passed!\n";
  95.  
  96. }
Advertisement
Add Comment
Please, Sign In to add comment