Advertisement
cdsatrian

string macth/search'in algorithm /w PHP

Nov 12th, 2014
319
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 5.50 KB | None | 0 0
  1. <?php
  2. /*********************************
  3. -- BRUTE FORCE STRING MATCHING
  4. *********************************/
  5. function brute_force($pattern, $subject)
  6. {
  7.     $n = strlen($subject);
  8.     $m = strlen($pattern);
  9.  
  10.     for ($i = 0; i < $n-$m; $i++) {
  11.         $j = 0;
  12.         while ($j < $m && $subject[$i+$j] == $pattern[$j]) {
  13.             $j++;
  14.         }
  15.         if ($j == $m) return $i;
  16.     }
  17.     return -1;
  18. }
  19.  
  20. echo brute_force('o wo', 'hello world!');
  21. /*********************************
  22. -- BOYER-MOORE STRING SEARCHING
  23. *********************************/
  24. /**
  25.  * Pattern we're searching for
  26.  *
  27.  * @var string
  28.  */
  29. $pattern = 'gloria';
  30.  
  31. /**
  32.  * The text we're searching in
  33.  *
  34.  * @var string
  35.  */
  36. $text = 'Sic transit gloria mundi, non transit gloria Gundi!';
  37.  
  38. /**
  39.  * Calculates the suffixes for a given pattern
  40.  *
  41.  * @param string $pattern
  42.  * @param array  $suffixes
  43.  */
  44. function suffixes($pattern, &$suffixes)
  45. {
  46.    $m = strlen($pattern);
  47.  
  48.    $suffixes[$m - 1] = $m;
  49.    $g = $m - 1;
  50.  
  51.    for ($i = $m - 2; $i >= 0; --$i) {
  52.       if ($i > $g && $suffixes[$i + $m - 1 - $f] < $i - $g) {
  53.          $suffixes[$i] = $suffixes[$i + $m - 1 - $f];
  54.       } else {
  55.          if ($i < $g) {
  56.             $g = $i;
  57.          }
  58.          $f = $i;
  59.  
  60.          while ($g >= 0 && $pattern[$g] == $pattern[$g + $m - 1 - $f]) {
  61.             $g--;
  62.          }
  63.          $suffixes[$i] = $f - $g;
  64.       }
  65.    }
  66. }
  67.  
  68. /**
  69.  * Fills in the array of bad characters.
  70.  *
  71.  * @param string $pattern
  72.  * @param array  $badChars
  73.  */
  74. function badCharacters($pattern, &$badChars)
  75. {
  76.    $m = strlen($pattern);
  77.  
  78.    for ($i = 0; $i < $m - 1; ++$i) {
  79.       $badChars[$pattern{$i}] = $m - $i - 1;
  80.    }
  81. }
  82.  
  83. /**
  84.  * Fills in the array of good suffixes
  85.  *
  86.  * @param string $pattern
  87.  * @param array  $goodSuffixes
  88.  */
  89. function goodSuffixes($pattern, &$goodSuffixes)
  90. {
  91.    $m       = strlen($pattern);
  92.    $suff    = array();
  93.  
  94.    suffixes($pattern, $suff);
  95.  
  96.    for ($i = 0; $i < $m; $i++) {
  97.       $goodSuffixes[$i] = $m;
  98.    }
  99.  
  100.    for ($i = $m - 1; $i >= 0; $i--) {
  101.       if ($suff[$i] == $i + 1) {
  102.          for ($j = 0; $j < $m - $i - 1; $j++) {
  103.             if ($goodSuffixes[$j] == $m) {
  104.                $goodSuffixes[$j] = $m - $i - 1;
  105.             }
  106.          }
  107.       }
  108.    }
  109.  
  110.    for ($i = 0; $i < $m - 2; $i++) {
  111.       $goodSuffixes[$m - 1 - $suff[$i]] = $m - $i - 1;
  112.    }
  113. }
  114.  
  115. /**
  116.  * Performs a search of the pattern into a given text
  117.  *
  118.  * @param string $pattern
  119.  * @param string $text
  120.  */
  121. function boyer_moore($pattern, $text)
  122. {
  123.    $n = strlen($text);
  124.    $m = strlen($pattern);
  125.  
  126.    $goodSuffixes    = array();
  127.    $badCharacters   = array();
  128.  
  129.    goodSuffixes($pattern, &$goodSuffixes);
  130.    badCharacters($pattern, &$badCharacters);
  131.  
  132.    $j = 0;
  133.    while ($j < $n - $m) {
  134.       for ($i = $m - 1; $i >= 0 && $pattern[$i] == $text[$i + $j]; $i--);
  135.       if ($i < 0) {
  136.          // note that if the substring occurs more
  137.          // than once into the text, the algorithm will
  138.          // print out each position of the substring
  139.          echo $j;
  140.          $j += $goodSuffixes[0];
  141.       } else {
  142.          $j += max($goodSuffixes[$i], $badCharacters[$text[$i + $j]] - $m + $i + 1);
  143.       }
  144.    }
  145. }
  146.  
  147. // search using Boyer-Moore
  148. // will return 12 and 38
  149. boyer_moore($pattern, $text);
  150. /*********************************
  151. -- RABIN-KARP STRING SEARCHING
  152. *********************************/
  153. function hash_string($str, $len)
  154. {
  155.     $hash = '';
  156.  
  157.     $hash_table = array(
  158.         'h' => 1,
  159.         'e' => 2,
  160.         'l' => 3,
  161.         'o' => 4,
  162.         'w' => 5,
  163.         'r' => 6,
  164.         'd' => 7,
  165.     );
  166.  
  167.     for ($i = 0; $i < $len; $i++) {
  168.         $hash .= $hash_table[$str{$i}];
  169.     }
  170.  
  171.     return (int)$hash;
  172. }
  173.  
  174. function rabin_karp($text, $pattern)
  175. {
  176.     $n = strlen($text);
  177.     $m = strlen($pattern);
  178.  
  179.     $text_hash = hash_string(substr($text, 0, $m), $m);
  180.     $pattern_hash = hash_string($pattern, $m);
  181.  
  182.     for ($i = 0; $i < $n-$m+1; $i++) {
  183.         if ($text_hash == $pattern_hash) {
  184.             return $i;
  185.         }
  186.  
  187.         $text_hash = hash_string(substr($text, $i, $m), $m);
  188.     }
  189.  
  190.     return -1;
  191. }
  192.  
  193. // 2
  194. echo rabin_karp('hello world', 'ello');
  195. /*********************************
  196. -- MORRIS-PRATT STRING SEARCHING
  197. *********************************/
  198. /**
  199.  * Pattern
  200.  *
  201.  * @var string
  202.  */
  203. $pattern = 'mollis';
  204.  
  205. /**
  206.  * Text to search
  207.  *
  208.  * @var string
  209.  */
  210. $text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque eleifend nisi viverra ipsum elementum porttitor quis at justo. Aliquam ligula felis, dignissim sit amet lobortis eget, lacinia ac augue. Quisque nec est elit, nec ultricies magna. Ut mi libero, dictum sit amet mollis non, aliquam et augue';
  211.  
  212. /**
  213.  * Preprocess the pattern and return the "next" table
  214.  *
  215.  * @param string $pattern
  216.  */
  217. function preprocessMorrisPratt($pattern, &$nextTable)
  218. {
  219.     $i = 0;
  220.     $j = $nextTable[0] = -1;
  221.     $len = strlen($pattern);
  222.  
  223.     while ($i < $len) {
  224.         while ($j > -1 && $pattern[$i] != $pattern[$j]) {
  225.             $j = $nextTable[$j];
  226.         }
  227.  
  228.         $nextTable[++$i] = ++$j;
  229.     }
  230. }
  231.  
  232. /**
  233.  * Performs a string search with the Morris-Pratt algorithm
  234.  *
  235.  * @param string $text
  236.  * @param string $pattern
  237.  */
  238. function MorrisPratt($text, $pattern)
  239. {
  240.     // get the text and pattern lengths
  241.     $n = strlen($text);
  242.     $m = strlen($pattern);
  243.     $nextTable = array();
  244.  
  245.     // calculate the next table
  246.     preprocessMorrisPratt($pattern, $nextTable);
  247.  
  248.     $i = $j = 0;
  249.     while ($j < $n) {
  250.         while ($i > -1 && $pattern[$i] != $text[$j]) {
  251.             $i = $nextTable[$i];
  252.         }
  253.         $i++;
  254.         $j++;
  255.         if ($i >= $m) {
  256.             return $j - $i;
  257.         }
  258.     }
  259.     return -1;
  260. }
  261.  
  262. // 275
  263. echo MorrisPratt($text, $pattern);
  264. //=== END
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement