Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.crypterium.kyc.analyzer
- import info.debatty.java.stringsimilarity.Damerau
- import kotlin.math.max
- import kotlin.math.min
- import kotlin.system.measureNanoTime
- fun main() {
- print("KycFirstName: ")
- val kycFN = readLine()!!
- print("KycLastName: ")
- val kycLN = readLine()!!
- print("BackFirstName: ")
- val backFN = readLine()!!
- print("backLastName: ")
- val backLN = readLine()!!
- val t = measureNanoTime {
- print("distance: ${KycAnalyzer(kycFN, kycLN, backFN, backLN, maxMistake = 2.0,
- maxSumMistake = 3.0).getSimilarity()} ")
- }
- println("time: $t")
- }
- internal fun stringModify(firstName: String, lastName: String): List<String> {
- return "$firstName $lastName".toUpperCase().split(" ", "-").filter { it != "N/A" }
- }
- class KycAnalyzer(
- kycFirstName: String, kycLastName: String, backFirstName: String, backLastName: String,
- private val kycData: List<String> = stringModify(kycFirstName, kycLastName),
- private val backData: List<String> = stringModify(backFirstName, backLastName),
- private val maxMistake: Double, private val maxSumMistake: Double) {
- fun getSimilarity(): Double {
- return when {
- !wordQuanChecker(kycData, backData, 2.0) -> 0.0
- !letterQuanChecker(kycData,backData,2.0) -> 0.0
- fullSimilarity(kycData, backData) -> 1.0
- else ->inaccurateSearch(kycData, backData)
- }
- }
- private fun letterQuanChecker(data1: List<String>, data2: List<String>, maxDifference: Double): Boolean{
- return (data1.sumBy { it.length }.toDouble() / data2.sumBy { it.length }.toDouble()
- in 1/maxDifference .. maxDifference)
- }
- private fun fullSimilarity(data1: List<String>, data2: List<String>): Boolean {
- return data1.containsAll(data2) || data2.containsAll(data1)
- }
- /*private fun inaccurateSearch1(data1: List<String>, data2: List<String>): Double {
- val minData = if (data1.size <= data2.size) data1 else data2
- val maxData = if (data1.size > data2.size) data1 else data2
- val damerau = Damerau()
- var distance = Double.MIN_VALUE
- val t = measureNanoTime {
- for (word1 in minData) {
- var curDistance = Double.MAX_VALUE
- for (word2 in maxData) {
- curDistance = min(curDistance, damerau.distance(word1, word2))
- }
- distance = max(curDistance, distance)
- }
- print("way1: distance: $distance ")
- }
- println("time: $t")
- return distance/min(minData.sumBy { it.length }, maxData.sumBy { it.length })
- }*/
- private fun inaccurateSearch(data1: List<String>, data2: List<String>): Double {
- val minData = if (data1.size <= data2.size) data1 else data2
- val maxData = if (data1.size > data2.size) data1 else data2
- val damerau = Damerau()
- var distance = Double.MIN_VALUE
- var sumMistake = 0
- minData.forEach {
- word1 -> var curDistance = Double.MAX_VALUE
- maxData.forEach {
- word2 -> curDistance = min(curDistance, damerau.distance(word1, word2))
- }
- sumMistake += curDistance.toInt()
- if(curDistance.toInt() > maxMistake || sumMistake > maxSumMistake) { return 0.0 }
- distance = max(distance, curDistance )
- }
- println("basicDistance: $distance ")
- return 1.0 -(distance/min(minData.sumBy { it.length }, maxData.sumBy { it.length }))
- }
- /*private fun inaccurateSearch3(data1: List<String>, data2: List<String>): Double {
- val minData = if (data1.size <= data2.size) data1 else data2
- val maxData = if (data1.size > data2.size) data1 else data2
- val damerau = Damerau()
- var distance = Double.MAX_VALUE
- val t3 =measureNanoTime{
- minData.map {
- word1->maxData.map{
- word2-> when(val curDist = damerau.distance(word1, word2)){
- in 0.5 .. distance ->distance = curDist
- }
- }
- }
- print("way3: distance: $distance ")
- }
- println("time: $t3")
- return distance/min(minData.sumBy { it.length }, maxData.sumBy { it.length })
- }*/
- private fun wordQuanChecker(data1: List<String>, data2: List<String>, maxDifference: Double): Boolean {
- return (data1.size.toDouble() / data2.size.toDouble() in 1 / maxDifference .. maxDifference)
- }
- }
- /*fun getSimilarity(): Double {
- val kycData = stringModify(kfn, kln)
- val backData = stringModify(bfn, bln)
- return when {
- !sizeChecker(kycData, backData, 2.0) -> 1.0
- else -> inaccurateSearch(kycData, backData)
- }
- fun inaccurateSearch(data1: List<String>, data2: List<String>): Double {
- val (minData, maxData) = if (data1.size <= data2.size) Pair(data1, data2) else Pair(data2, data1)
- // val minData = if (data1.size <= data2.size) data1 else data2
- // val maxData = if (data1.size > data2.size) data1 else data2
- var distance = Double.MIN_VALUE
- val damerau = Damerau()
- minData.forEach { word1 ->
- var curDistance = maxData.map { word2 ->
- damerau.distance(word1, word2)
- }.min() ?: MAX_VALUE
- // for (word2 in maxData) {
- // curDistance = min(curDistance, damerau.distance(word1, word2))
- // }
- distance = max(distance, curDistance)
- }
- return distance / minData.sumBy { it.length }
- }
- fun fullSimilarityChecker(data1: List<String>, data2: List<String>): Boolean {
- return data1.containsAll(data2) || data2.containsAll(data1)
- }
- }
- }
- //fun t(f: String) = privFun(f = f)
- //internal fun privFun(f: String): Unit = println(f)
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement