Advertisement
Guest User

for ricardo

a guest
Nov 3rd, 2016
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.93 KB | None | 0 0
  1. <?php
  2.     function viterbi_segment($text) {
  3.         global $max_word_length;
  4.        
  5.         $probs = array("1.0");
  6.         $lasts = array(0);
  7.        
  8.         foreach (range(1, strlen($text)) as $i) {
  9.             $max_a = 0.0;
  10.             $max_b = 0;
  11.            
  12.             foreach (range(max(0, $i - $max_word_length), $i - 1) as $j) {
  13.                 $item = (float)$probs[$j] * word_prob(substr($text, $j, $i-$j));
  14.                 if ($item > $max_a || $item == $max_a && $j > $max_b) {
  15.                     $max_a = $item;
  16.                     $max_b = $j;
  17.                 }
  18.             }
  19.            
  20.             $probs[] = (string)$max_a;
  21.             $lasts[] = $max_b;
  22.         }
  23.        
  24.         $words = array();
  25.         $i = strlen($text);
  26.        
  27.         while(0 < $i) {
  28.             $words[] = substr($text, $lasts[$i], $i-$lasts[$i]);
  29.             $i = $lasts[$i];
  30.         }
  31.        
  32.         return array(array_reverse($words), end($probs));
  33.     }
  34.    
  35.     function word_prob($word) {
  36.         global $dictionary;
  37.         global $total;
  38.        
  39.         $value = isset($dictionary[$word])? $dictionary[$word] : 0;
  40.         return $value / $total;
  41.     }
  42.  
  43.     function words($text) {
  44.         preg_match_all('/[a-z]+/', $text, $matches);
  45.         return $matches;
  46.     }
  47.  
  48.     # CREATE DICTIONARY OF WORDS TO COMPARE TO
  49.    $dictionary = array();
  50.     $max_word_length = 0;
  51.     $total = 0;
  52.    
  53.     $handle = fopen("InputWordList.txt", "r");
  54.     if ($handle) {
  55.         while (($line = fgets($handle)) !== false) {
  56.             $w = explode(" ", $line);
  57.             $value = (int)$w[1];
  58.             $dictionary[$w[0]] = $value;
  59.            
  60.             $len = strlen($w[0]);
  61.             if ($len > $max_word_length) $max_word_length = $len;
  62.            
  63.             $total += $value;
  64.         }
  65.         fclose($handle);
  66.     }
  67.    
  68.     # SPLIT URL
  69.    print_r(viterbi_segment('thisisacombinedurl.com'));
  70. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement