Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- | Code for calculating the levenstein distance between two strings
- module Data.Levenstein (distance) where
- import Prelude
- import Data.Array as Array
- import Data.Array ((!!))
- import Data.Function.Memoize (memoize2)
- import Data.Maybe as Maybe
- import Data.Newtype (unwrap)
- import Data.String (CodePoint)
- import Data.String as String
- import Data.Ord.Min (Min(..))
- import Partial.Unsafe (unsafePartial)
- min3 ∷ ∀ a. Ord a ⇒ a → a → a → a
- min3 x y z =
- unwrap $ Min x <> Min y <> Min z
- unsafeIndex ∷ ∀ a. Array a → Int → a
- unsafeIndex arr idx =
- unsafePartial $ Maybe.fromJust $ arr !! idx
- infixl 8 unsafeIndex as !!!
- -- | Calculates the distance from the code points
- distanceCodePoints ∷ Array CodePoint → Array CodePoint → Int
- distanceCodePoints x y =
- d (Array.length x) (Array.length y)
- where
- d = memoize2 $ \m n → dist m n
- dist i 0 = i
- dist 0 j = j
- dist i j =
- let
- a = (d (i - 1) j) + 1
- b = (d i (j - 1)) + 1
- c = (d (i - 1) (j - 1)) + if x !!! (i - 1) == y !!! (j - 1) then 0 else 1
- in
- min3 a b c
- -- | Calculates the levenshtein distance between two strings - this is the number of changes required to get from one string to the other..
- distance ∷ String → String → Int
- distance x y =
- distanceCodePoints (String.toCodePointArray x) (String.toCodePointArray y)
Add Comment
Please, Sign In to add comment