Advertisement
Guest User

Untitled

a guest
Oct 17th, 2019
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.59 KB | None | 0 0
  1. {-# LANGUAGE OverloadedStrings #-}
  2.  
  3. module Text.MFKC ( distance ) where
  4.  
  5. ----------------------------------------------------------------------------
  6. -- |
  7. -- Module : MKFC
  8. -- Author : (C) Andrew Fatkulin
  9. -- License : BSD-3-Clause (see the file LICENSE)
  10. --
  11. --
  12. -- Computing the most frequent k characters distance of two strings
  13. ----------------------------------------------------------------------------
  14.  
  15. import Data.List (sortOn)
  16. import Data.Text (Text)
  17. import Data.HashMap.Strict((!), HashMap)
  18. import qualified Data.Text as Tx
  19. import qualified Data.HashMap.Strict as Hm
  20.  
  21.  
  22. infixr 1 ?
  23. (?) :: Bool -> a -> a -> a
  24. (?) True x _ = x
  25. (?) False _ y = y
  26.  
  27. type MkHash = HashMap Char Int
  28.  
  29. toHash :: Text -> MkHash
  30. toHash text = Tx.foldl fillMap emptyMap text
  31. where
  32. emptyMap = Hm.empty :: MkHash
  33. fillMap = \acc ch ->
  34. let value = (Hm.member ch acc) ? (acc ! ch) + 1 $ 1
  35. in Hm.insert ch value acc
  36.  
  37. takeMostFrequentK :: Int -> MkHash -> MkHash
  38. takeMostFrequentK k hash = Hm.fromList . (take k) . reverse . (sortOn snd) . Hm.toList $ hash
  39.  
  40. calcSimilarity :: (Int, Int, Int) -> Int
  41. calcSimilarity (sim, v1, v2) = v1 == v2 ? (sim + v2) $ (sim + v1 + v2)
  42.  
  43. similarity :: MkHash -> MkHash -> Int
  44. similarity h1 h2 = Hm.foldlWithKey' calc 0 h2
  45. where
  46. calc acc k v = (Hm.member k h1)
  47. ? calcSimilarity (acc, v, ( h1 ! k ))
  48. $ acc
  49.  
  50. distance :: Int -> Int -> Text -> Text -> Int
  51. distance k max text text2 = max - (similarity hash1 hash2)
  52. where
  53. hash1 = ((takeMostFrequentK k) . toHash) text
  54. hash2 = ((takeMostFrequentK k) . toHash) text2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement