Advertisement
Guest User

123

a guest
Sep 17th, 2019
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.28 KB | None | 0 0
  1. package com.crypterium.kyc.analyzer
  2.  
  3. import info.debatty.java.stringsimilarity.Damerau
  4. import kotlin.math.max
  5. import kotlin.math.min
  6. import kotlin.system.measureNanoTime
  7.  
  8. fun main() {
  9. print("KycFirstName: ")
  10. val kycFN = readLine()!!
  11. print("KycLastName: ")
  12. val kycLN = readLine()!!
  13. print("BackFirstName: ")
  14. val backFN = readLine()!!
  15. print("backLastName: ")
  16. val backLN = readLine()!!
  17. val t = measureNanoTime {
  18. print("distance: ${KycAnalyzer(kycFN, kycLN, backFN, backLN, maxMistake = 2.0,
  19. maxSumMistake = 3.0).getSimilarity()} ")
  20. }
  21. println("time: $t")
  22. }
  23.  
  24. internal fun stringModify(firstName: String, lastName: String): List<String> {
  25. return "$firstName $lastName".toUpperCase().split(" ", "-").filter { it != "N/A" }
  26. }
  27.  
  28. class KycAnalyzer(
  29. kycFirstName: String, kycLastName: String, backFirstName: String, backLastName: String,
  30. private val kycData: List<String> = stringModify(kycFirstName, kycLastName),
  31. private val backData: List<String> = stringModify(backFirstName, backLastName),
  32. private val maxMistake: Double, private val maxSumMistake: Double) {
  33. fun getSimilarity(): Double {
  34. return when {
  35. !wordQuanChecker(kycData, backData, 2.0) -> 0.0
  36. !letterQuanChecker(kycData,backData,2.0) -> 0.0
  37. fullSimilarity(kycData, backData) -> 1.0
  38. else ->inaccurateSearch(kycData, backData)
  39. }
  40. }
  41.  
  42. private fun letterQuanChecker(data1: List<String>, data2: List<String>, maxDifference: Double): Boolean{
  43. return (data1.sumBy { it.length }.toDouble() / data2.sumBy { it.length }.toDouble()
  44. in 1/maxDifference .. maxDifference)
  45. }
  46.  
  47. private fun fullSimilarity(data1: List<String>, data2: List<String>): Boolean {
  48. return data1.containsAll(data2) || data2.containsAll(data1)
  49. }
  50. /*private fun inaccurateSearch1(data1: List<String>, data2: List<String>): Double {
  51. val minData = if (data1.size <= data2.size) data1 else data2
  52. val maxData = if (data1.size > data2.size) data1 else data2
  53. val damerau = Damerau()
  54. var distance = Double.MIN_VALUE
  55. val t = measureNanoTime {
  56. for (word1 in minData) {
  57. var curDistance = Double.MAX_VALUE
  58. for (word2 in maxData) {
  59. curDistance = min(curDistance, damerau.distance(word1, word2))
  60. }
  61. distance = max(curDistance, distance)
  62. }
  63. print("way1: distance: $distance ")
  64. }
  65. println("time: $t")
  66. return distance/min(minData.sumBy { it.length }, maxData.sumBy { it.length })
  67. }*/
  68. private fun inaccurateSearch(data1: List<String>, data2: List<String>): Double {
  69. val minData = if (data1.size <= data2.size) data1 else data2
  70. val maxData = if (data1.size > data2.size) data1 else data2
  71. val damerau = Damerau()
  72. var distance = Double.MIN_VALUE
  73. var sumMistake = 0
  74. minData.forEach {
  75. word1 -> var curDistance = Double.MAX_VALUE
  76. maxData.forEach {
  77. word2 -> curDistance = min(curDistance, damerau.distance(word1, word2))
  78. }
  79. sumMistake += curDistance.toInt()
  80. if(curDistance.toInt() > maxMistake || sumMistake > maxSumMistake) { return 0.0 }
  81. distance = max(distance, curDistance )
  82. }
  83. println("basicDistance: $distance ")
  84. return 1.0 -(distance/min(minData.sumBy { it.length }, maxData.sumBy { it.length }))
  85. }
  86.  
  87. /*private fun inaccurateSearch3(data1: List<String>, data2: List<String>): Double {
  88. val minData = if (data1.size <= data2.size) data1 else data2
  89. val maxData = if (data1.size > data2.size) data1 else data2
  90. val damerau = Damerau()
  91. var distance = Double.MAX_VALUE
  92. val t3 =measureNanoTime{
  93. minData.map {
  94. word1->maxData.map{
  95. word2-> when(val curDist = damerau.distance(word1, word2)){
  96. in 0.5 .. distance ->distance = curDist
  97. }
  98. }
  99. }
  100. print("way3: distance: $distance ")
  101. }
  102. println("time: $t3")
  103. return distance/min(minData.sumBy { it.length }, maxData.sumBy { it.length })
  104. }*/
  105.  
  106.  
  107. private fun wordQuanChecker(data1: List<String>, data2: List<String>, maxDifference: Double): Boolean {
  108. return (data1.size.toDouble() / data2.size.toDouble() in 1 / maxDifference .. maxDifference)
  109. }
  110. }
  111.  
  112. /*fun getSimilarity(): Double {
  113. val kycData = stringModify(kfn, kln)
  114. val backData = stringModify(bfn, bln)
  115. return when {
  116. !sizeChecker(kycData, backData, 2.0) -> 1.0
  117. else -> inaccurateSearch(kycData, backData)
  118. }
  119. fun inaccurateSearch(data1: List<String>, data2: List<String>): Double {
  120.  
  121. val (minData, maxData) = if (data1.size <= data2.size) Pair(data1, data2) else Pair(data2, data1)
  122.  
  123. // val minData = if (data1.size <= data2.size) data1 else data2
  124. // val maxData = if (data1.size > data2.size) data1 else data2
  125. var distance = Double.MIN_VALUE
  126. val damerau = Damerau()
  127. minData.forEach { word1 ->
  128. var curDistance = maxData.map { word2 ->
  129. damerau.distance(word1, word2)
  130. }.min() ?: MAX_VALUE
  131.  
  132. // for (word2 in maxData) {
  133. // curDistance = min(curDistance, damerau.distance(word1, word2))
  134. // }
  135. distance = max(distance, curDistance)
  136. }
  137. return distance / minData.sumBy { it.length }
  138. }
  139.  
  140.  
  141.  
  142. fun fullSimilarityChecker(data1: List<String>, data2: List<String>): Boolean {
  143. return data1.containsAll(data2) || data2.containsAll(data1)
  144. }
  145. }
  146. }
  147. //fun t(f: String) = privFun(f = f)
  148.  
  149. //internal fun privFun(f: String): Unit = println(f)
  150. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement