Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ####################################
- # jaro-winkler bash implementation #
- # by atcasanova@gmail.com #
- ####################################
- # not properly working yet, but br #
- # ings acceptable results when com #
- # aring strings. Feel free to corr #
- # ect #
- ####################################
- #!/bin/bash
- str1=`echo $1 | sed 's/\.//g' | sed 's/\;//g' | sed 's/\_//g' | sed 's/\-//g'`
- str2=`echo $2 | sed 's/\.//g' | sed 's/\;//g' | sed 's/\_//g' | sed 's/\-//g'`
- lenstr1=`echo $str1 | wc -c`
- let lenstr1--
- lenstr2=`echo $str2 | wc -c`
- let lenstr2--
- if [ "$str1" == "$str2" ]
- then
- jaro=1;
- echo $jaro
- else
- if [ $lenstr2 -gt $lenstr1 ]
- then
- tmp=$str1
- str1=$str2
- str2=$tmp
- tmp=$lenstr1
- lenstr1=$lenstr2
- lenstr2=$tmp
- fi
- for (( k=0; k<=$lenstr1; k++ ))
- do
- found1[$k]=0;
- done
- for (( q=0; q<=$lenstr2; q++ ))
- do
- found2[$q]=0;
- done;
- md=`echo $lenstr1/2-1 | bc`
- m=0
- t=0
- for (( i=1; i<=$lenstr1; i++ ))
- do
- start=`expr $i - $md`
- end=`expr $i + $md`
- for (( j=$start; j<=$end; j++ ))
- do
- char1=`echo $str1 | cut -c$i`
- if [ $j -le `expr $i + $md` ]
- then
- if [ $j -gt 0 ]
- then
- echo ""
- char2=`echo $str2 | cut -c$j`
- echo -n "Checking $char1 ($i) vs $char2 ($j)"
- else
- char2=naoehchar
- fi
- if [ "$char1" == "$char2" ] && [ ${found2[$j]} -eq 0 ] && [ ${found1[$i]} -eq 0 ]
- then
- let m++
- echo -n " [match] "
- found2[$j]=1
- found1[$i]=1
- if [ $i -ne $j ]
- then
- echo -n " [transposition] "
- let t++
- fi
- fi
- fi
- done
- done
- echo ""
- echo Transpositions: $t
- let t=$t/2
- echo matching distance: $md
- var=`expr $m - $t`
- echo matching characters: $m
- p1=.333
- p2=`echo "scale=3; $m/$lenstr1" | bc`
- p3=`echo "scale=3; $m/$lenstr2" | bc`
- if [ $m -lt 1 ]
- then
- jaro=0;
- else
- p4=`echo "scale=3; ($m-$t)/$m" | bc`
- jaro=`echo "scale=3; $p1*($p2+$p3+$p4)" | bc`
- fi
- echo Jaro-Winkler Distance for $1 and $2 is $jaro
- fi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement