Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Instead of string_match use these two functions below
- static public function string_compare($str_a, $str_b)
- {
- $length = strlen($str_a);
- $length_b = strlen($str_b);
- $i = 0;
- $segmentcount = 0;
- $segmentsinfo = array();
- $segment = '';
- while ($i < $length)
- {
- $char = substr($str_a, $i, 1);
- if (strpos($str_b, $char) !== FALSE)
- {
- $segment = $segment.$char;
- if (strpos($str_b, $segment) !== FALSE)
- {
- $segmentpos_a = $i - strlen($segment) + 1;
- $segmentpos_b = strpos($str_b, $segment);
- $positiondiff = abs($segmentpos_a - $segmentpos_b);
- $posfactor = ($length - $positiondiff) / $length_b; // <-- ?
- $lengthfactor = strlen($segment)/$length;
- $segmentsinfo[$segmentcount] = array( 'segment' => $segment, 'score' => ($posfactor * $lengthfactor));
- }
- else
- {
- $segment = '';
- $i--;
- $segmentcount++;
- }
- }
- else
- {
- $segment = '';
- $segmentcount++;
- }
- $i++;
- }
- // PHP 5.3 lambda in array_map
- $totalscore = array_sum(array_map(function($v) { return $v['score']; }, $segmentsinfo));
- return $totalscore;
- }
- class StringCompareJaroWinkler
- {
- public function compare($str1, $str2)
- {
- return $this->JaroWinkler($str1, $str2, $PREFIXSCALE = 0.1 );
- }
- private function getCommonCharacters( $string1, $string2, $allowedDistance ){
- $str1_len = mb_strlen($string1);
- $str2_len = mb_strlen($string2);
- $temp_string2 = $string2;
- $commonCharacters='';
- for( $i=0; $i < $str1_len; $i++){
- $noMatch = True;
- // compare if char does match inside given allowedDistance
- // and if it does add it to commonCharacters
- for( $j= max( 0, $i-$allowedDistance ); $noMatch && $j < min( $i + $allowedDistance + 1, $str2_len ); $j++){
- if( $temp_string2[$j] == $string1[$i] ){
- $noMatch = False;
- $commonCharacters .= $string1[$i];
- $temp_string2[$j] = '';
- }
- }
- }
- return $commonCharacters;
- }
- private function Jaro( $string1, $string2 ){
- $str1_len = mb_strlen( $string1 );
- $str2_len = mb_strlen( $string2 );
- // theoretical distance
- $distance = (int) floor(min( $str1_len, $str2_len ) / 2.0);
- // get common characters
- $commons1 = $this->getCommonCharacters( $string1, $string2, $distance );
- $commons2 = $this->getCommonCharacters( $string2, $string1, $distance );
- if( ($commons1_len = mb_strlen( $commons1 )) == 0) return 0;
- if( ($commons2_len = mb_strlen( $commons2 )) == 0) return 0;
- // calculate transpositions
- $transpositions = 0;
- $upperBound = min( $commons1_len, $commons2_len );
- for( $i = 0; $i < $upperBound; $i++){
- if( $commons1[$i] != $commons2[$i] ) $transpositions++;
- }
- $transpositions /= 2.0;
- // return the Jaro distance
- return ($commons1_len/($str1_len) + $commons2_len/($str2_len) + ($commons1_len - $transpositions)/($commons1_len)) / 3.0;
- }
- private function getPrefixLength( $string1, $string2, $MINPREFIXLENGTH = 4 ){
- $n = min( array( $MINPREFIXLENGTH, mb_strlen($string1), mb_strlen($string2) ) );
- for($i = 0; $i < $n; $i++){
- if( $string1[$i] != $string2[$i] ){
- // return index of first occurrence of different characters
- return $i;
- }
- }
- // first n characters are the same
- return $n;
- }
- private function JaroWinkler($string1, $string2, $PREFIXSCALE = 0.1 ){
- $JaroDistance = $this->Jaro( $string1, $string2 );
- $prefixLength = $this->getPrefixLength( $string1, $string2 );
- return $JaroDistance + $prefixLength * $PREFIXSCALE * (1.0 - $JaroDistance);
- }
- }
- $jw = new StringCompareJaroWinkler();
- echo $jw->compare("jonas","asjon");
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement