Advertisement
Guest User

Untitled

a guest
Jul 29th, 2013
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.05 KB | None | 0 0
  1. package com.rockymadden.stringmetric.phonetic
  2.  
  3. import com.rockymadden.stringmetric.{StringAlgorithm, StringFilter}
  4. import com.rockymadden.stringmetric.Alphabet.{Alpha, LowercaseVowel}
  5. import scala.annotation.{switch, tailrec}
  6.  
  7. /** An implementation of the Metaphone algorithm. */
  8. class MetaphoneAlgorithm extends StringAlgorithm[DummyImplicit, String] { this: StringFilter =>
  9. final override def compute(charArray: Array[Char])(implicit di: DummyImplicit): Option[Array[Char]] = {
  10. val fca = filter(charArray)
  11.  
  12. if (fca.length == 0 || !(Alpha isSuperset fca.head)) None
  13. else {
  14. val th = deduplicate(transcodeHead(fca.map(_.toLower)))
  15. val t = transcode(Array.empty[Char], th.head, th.tail, Array.empty[Char])
  16.  
  17. if (t.length == 0) None else Some(t) // Single Y or W would have 0 length.
  18. }
  19. }
  20.  
  21. final override def compute(string: String)(implicit di: DummyImplicit): Option[String] =
  22. compute(string.toCharArray).map(_.mkString)
  23.  
  24. private[this] def deduplicate(ca: Array[Char]) =
  25. if (ca.length <= 1) ca
  26. else ca.sliding(2).withFilter(a => a(0) == 'c' || a(0) != a(1)).map(a => a(0)).toArray[Char] :+ ca.last
  27.  
  28. @tailrec
  29. private[this] def transcode(l: Array[Char], c: Char, r: Array[Char], o: Array[Char]): Array[Char] = {
  30. if (c == '\0' && r.length == 0) o
  31. else {
  32. def shift(d: Int, ca: Array[Char]) = {
  33. val sca = r.splitAt(d - 1)
  34.  
  35. (
  36. if (sca._1.length > 0) (l :+ c) ++ sca._1 else l :+ c,
  37. if (sca._2.length > 0) sca._2.head else '\0',
  38. if (sca._2.length > 1) sca._2.tail else Array.empty[Char],
  39. ca
  40. )
  41. }
  42.  
  43. val t = {
  44. (c: @switch) match {
  45. case 'a' | 'e' | 'i' | 'o' | 'u' => if (l.length == 0) shift(1, o:+ c) else shift(1, o)
  46. case 'f' | 'j' | 'l' | 'm' | 'n' | 'r' => shift(1, o :+ c)
  47. case 'b' => if (l.length >= 1 && l.last == 'm' && r.length == 0) shift(1, o) else shift(1, o :+ 'b')
  48. case 'c' =>
  49. if (r.length >= 1 && r.head == 'h' && l.length >= 1 && l.last == 's') shift(1, o :+ 'k')
  50. else if (r.length >= 2 && r.head == 'i' && r(1) == 'a') shift(3, o :+ 'x')
  51. else if ((r.length >= 1 && r.head == 'h')
  52. || (l.length >= 1 && r.length >= 1 && l.last == 's' && r.head == 'h')) shift(2, o :+ 'x')
  53. else if (l.length >= 1 && r.length >= 1 && l.last == 's'
  54. && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o)
  55. else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(1, o :+ 's')
  56. else shift(1, o :+ 'k')
  57. case 'd' =>
  58. if (r.length >= 2 && r.head == 'g'
  59. && (r(1) == 'e' || r(1) == 'y' || r(1) == 'i')) shift(1, o :+ 'j')
  60. else shift(1, o :+ 't')
  61. case 'g' =>
  62. if ((r.length > 1 && r.head == 'h')
  63. || (r.length == 1 && r.head == 'n')
  64. || (r.length == 3 && r.head == 'n' && r(1) == 'e' && r(2) == 'd')) shift(1, o)
  65. else if (r.length >= 1 && (r.head == 'i' || r.head == 'e' || r.head == 'y')) shift(2, o :+ 'j')
  66. else shift(1, o :+ 'k')
  67. case 'h' =>
  68. if ((l.length >= 1 && (LowercaseVowel isSuperset l.last) && (r.length == 0 || !(LowercaseVowel isSuperset r.head)))
  69. || (l.length >= 2 && l.last == 'h'
  70. && (l(l.length - 2) == 'c' || l(l.length - 2) == 's' || l(l.length - 2) == 'p'
  71. || l(l.length - 2) == 't' || l(l.length - 2) == 'g'))) shift(1, o)
  72. else shift(1, o :+ 'h')
  73. case 'k' => if (l.length >= 1 && l.last == 'c') shift(1, o) else shift(1, o :+ 'k')
  74. case 'p' => if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'f') else shift(1, o :+ 'p')
  75. case 'q' => shift(1, o :+ 'k')
  76. case 's' =>
  77. if (r.length >= 2 && r.head == 'i' && (r(1) == 'o' || r(1) == 'a')) shift(3, o :+ 'x')
  78. else if (r.length >= 1 && r.head == 'h') shift(2, o :+ 'x')
  79. else shift(1, o :+ 's')
  80. case 't' =>
  81. if (r.length >= 2 && r.head == 'i' && (r(1) == 'a' || r(1) == 'o')) shift(3, o :+ 'x')
  82. else if (r.length >= 1 && r.head == 'h') shift(2, o :+ '0')
  83. else if (r.length >= 2 && r.head == 'c' && r(1) == 'h') shift(1, o)
  84. else shift(1, o :+ 't')
  85. case 'v' => shift(1, o :+ 'f')
  86. case 'w' | 'y' => if (r.length == 0 || !(LowercaseVowel isSuperset r.head)) shift(1, o) else shift(1, o :+ c)
  87. case 'x' => shift(1, (o :+ 'k') :+ 's')
  88. case 'z' => shift(1, o :+ 's')
  89. case _ => shift(1, o)
  90. }
  91. }
  92.  
  93. transcode(t._1, t._2, t._3, t._4)
  94. }
  95. }
  96.  
  97. private[this] def transcodeHead(ca: Array[Char]) = {
  98. (ca.length: @switch) match {
  99. case 0 => ca
  100. case 1 => if (ca.head == 'x') Array('s') else ca
  101. case _ =>
  102. (ca.head: @switch) match {
  103. case 'a' if (ca(1) == 'e') => ca.tail
  104. case 'g' | 'k' | 'p' if (ca(1) == 'n') => ca.tail
  105. case 'w' if (ca(1) == 'r') => ca.tail
  106. case 'w' if (ca(1) == 'h') => 'w' +: ca.drop(2)
  107. case 'x' => 's' +: ca.tail
  108. case _ => ca
  109. }
  110. }
  111. }
  112. }
  113.  
  114. object MetaphoneAlgorithm {
  115. private lazy val self = apply()
  116.  
  117. def apply(): MetaphoneAlgorithm = new MetaphoneAlgorithm with StringFilter
  118.  
  119. def compute(charArray: Array[Char]) = self.compute(charArray)
  120.  
  121. def compute(string: String) = self.compute(string)
  122. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement