Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- function viterbi_segment($text) {
- global $max_word_length;
- $probs = array("1.0");
- $lasts = array(0);
- foreach (range(1, strlen($text)) as $i) {
- $max_a = 0.0;
- $max_b = 0;
- foreach (range(max(0, $i - $max_word_length), $i - 1) as $j) {
- $item = (float)$probs[$j] * word_prob(substr($text, $j, $i-$j));
- if ($item > $max_a || $item == $max_a && $j > $max_b) {
- $max_a = $item;
- $max_b = $j;
- }
- }
- $probs[] = (string)$max_a;
- $lasts[] = $max_b;
- }
- $words = array();
- $i = strlen($text);
- while(0 < $i) {
- $words[] = substr($text, $lasts[$i], $i-$lasts[$i]);
- $i = $lasts[$i];
- }
- return array(array_reverse($words), end($probs));
- }
- function word_prob($word) {
- global $dictionary;
- global $total;
- $value = isset($dictionary[$word])? $dictionary[$word] : 0;
- return $value / $total;
- }
- function words($text) {
- preg_match_all('/[a-z]+/', $text, $matches);
- return $matches;
- }
- # CREATE DICTIONARY OF WORDS TO COMPARE TO
- $dictionary = array();
- $max_word_length = 0;
- $total = 0;
- $handle = fopen("InputWordList.txt", "r");
- if ($handle) {
- while (($line = fgets($handle)) !== false) {
- $w = explode(" ", $line);
- $value = (int)$w[1];
- $dictionary[$w[0]] = $value;
- $len = strlen($w[0]);
- if ($len > $max_word_length) $max_word_length = $len;
- $total += $value;
- }
- fclose($handle);
- }
- # SPLIT URL
- print_r(viterbi_segment('thisisacombinedurl.com'));
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement