Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Public Function strSimLookup(str1 As Variant, rRng As Range, Optional returnType) As Variant
- 'Return either the best match or the index of the best match
- 'depending on returnTYype parameter) between str1 and strings in rRng)
- ' returnType = 0 or omitted: returns the best matching string
- ' returnType = 1 : returns the index of the best matching string
- ' returnType = 2 : returns the similarity metric
- Dim sPairs1 As Collection
- Dim sPairs2 As Collection
- Dim metric, bestMetric As Double
- Dim i, iBest As Long
- Const RETURN_STRING As Integer = 0
- Const RETURN_INDEX As Integer = 1
- Const RETURN_METRIC As Integer = 2
- If IsMissing(returnType) Then returnType = RETURN_STRING
- Set sPairs1 = New Collection
- WordLetterPairs CStr(str1), sPairs1
- bestMetric = -1
- iBest = -1
- For i = 1 To rRng.Count
- 'Exit For '' delete this
- Set sPairs2 = New Collection
- WordLetterPairs CStr(rRng(i)), sPairs2
- metric = SimilarityMetric(sPairs1, sPairs2)
- If metric > bestMetric Then
- bestMetric = metric
- iBest = i
- End If
- Set sPairs2 = Nothing
- Next i
- If iBest = -1 Then
- strSimLookup = CVErr(xlErrValue)
- Exit Function
- End If
- Select Case returnType
- Case RETURN_STRING
- strSimLookup = CStr(rRng(iBest))
- Case RETURN_INDEX
- strSimLookup = iBest
- Case Else
- strSimLookup = bestMetric
- End Select
- End Function
- Public Function strSim(str1 As String, str2 As String) As Variant
- Dim ilen, iLen1, ilen2 As Integer
- iLen1 = Len(str1)
- ilen2 = Len(str2)
- If iLen1 >= ilen2 Then ilen = ilen2 Else ilen = iLen1
- strSim = stringSimilarity(Left(str1, ilen), Left(str2, ilen))
- End Function
- Sub WordLetterPairs(str As String, pairColl As Collection)
- 'Tokenize str into words, then add all letter pairs to pairColl
- Dim Words() As String
- Dim word, nPairs, pair As Integer
- Words = Split(str)
- If UBound(Words) < 0 Then
- Set pairColl = Nothing
- Exit Sub
- End If
- For word = 0 To UBound(Words)
- 'Exit For '' delete this
- nPairs = Len(Words(word)) - 1
- If nPairs > 0 Then
- For pair = 1 To nPairs
- pairColl.Add Mid(Words(word), pair, 2)
- Next pair
- End If
- Next word
- End Sub
- Private Function SimilarityMetric(sPairs1 As Collection, sPairs2 As Collection) As Variant
- 'Helper function to calculate similarity metric given two collections of letter pairs.
- 'This function is designed to allow the pair collections to be set up separately as needed.
- 'NOTE: sPairs2 collection will be altered as pairs are removed; copy the collection
- 'if this is not the desired behavior.
- 'Also assumes that collections will be deallocated somewhere else
- Dim Intersect As Double
- Dim Union As Double
- Dim i, j As Long
- If sPairs1.Count = 0 Or sPairs2.Count = 0 Then
- SimilarityMetric = CVErr(xlErrNA)
- Exit Function
- End If
- Union = sPairs1.Count + sPairs2.Count
- Intersect = 0
- For i = 1 To sPairs1.Count
- 'Exit For '' delete this
- For j = 1 To sPairs2.Count
- 'Exit For '' delete this
- If StrComp(sPairs1(i), sPairs2(j)) = 0 Then
- Intersect = Intersect + 1
- sPairs2.Remove j
- Exit For
- End If
- Next j
- Next i
- SimilarityMetric = (2 * Intersect) / Union
- End Function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement