alphaslut

FuzzyLogic

Dec 19th, 2020
1,182
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
VBScript 18.20 KB | None | 0 0
  1. Option Explicit
  2. Type RankInfo
  3.     Offset As Integer
  4.     percentage As Single
  5. End Type
  6.  
  7. Function FuzzyPercent(ByVal String1 As String, _
  8.                       ByVal String2 As String, _
  9.                       Optional Algorithm As Integer = 3, _
  10.                       Optional Normalised As Boolean = False) As Single
  11. '*************************************
  12. '** Return a % match on two strings **
  13. '*************************************
  14. Dim intLen1 As Integer, intLen2 As Integer
  15. Dim intCurLen As Integer
  16. Dim intTo As Integer
  17. Dim intPos As Integer
  18. Dim intPtr As Integer
  19. Dim intScore As Integer
  20. Dim intTotScore As Integer
  21. Dim intStartPos As Integer
  22. Dim strWork As String
  23.  
  24. '-------------------------------------------------------
  25. '-- If strings havent been normalised, normalise them --
  26. '-------------------------------------------------------
  27. If Normalised = False Then
  28.     String1 = LCase(Trim(String1))
  29.     String2 = LCase(Trim(String2))
  30. End If
  31.  
  32. '----------------------------------------------
  33. '-- Give 100% match if strings exactly equal --
  34. '----------------------------------------------
  35. If String1 = String2 Then
  36.     FuzzyPercent = 1
  37.     Exit Function
  38. End If
  39.  
  40. intLen1 = Len(String1)
  41. intLen2 = Len(String2)
  42.  
  43. '----------------------------------------
  44. '-- Give 0% match if string length < 2 --
  45. '----------------------------------------
  46. If intLen1 < 2 Then
  47.     FuzzyPercent = 0
  48.     Exit Function
  49. End If
  50.  
  51. intTotScore = 0                   'initialise total possible score
  52. intScore = 0                      'initialise current score
  53.  
  54. '--------------------------------------------------------
  55. '-- If Algorithm = 1 or 3, Search for single characters --
  56. '--------------------------------------------------------
  57. If (Algorithm And 1) <> 0 Then
  58.     FuzzyAlg1 String1, String2, intScore, intTotScore
  59.     If intLen1 < intLen2 Then FuzzyAlg1 String2, String1, intScore, intTotScore
  60. End If
  61.  
  62. '-----------------------------------------------------------
  63. '-- If Algorithm = 2 or 3, Search for pairs, triplets etc. --
  64. '-----------------------------------------------------------
  65. If (Algorithm And 2) <> 0 Then
  66.     FuzzyAlg2 String1, String2, intScore, intTotScore
  67.     If intLen1 < intLen2 Then FuzzyAlg2 String2, String1, intScore, intTotScore
  68. End If
  69.  
  70. FuzzyPercent = intScore / intTotScore
  71.  
  72. End Function
  73. Private Sub FuzzyAlg1(ByVal String1 As String, _
  74.                       ByVal String2 As String, _
  75.                       ByRef score As Integer, _
  76.                       ByRef TotScore As Integer)
  77. Dim intLen1 As Integer, intPos As Integer, intPtr As Integer, intStartPos As Integer
  78.  
  79. intLen1 = Len(String1)
  80. TotScore = TotScore + intLen1              'update total possible score
  81. intPos = 0
  82. For intPtr = 1 To intLen1
  83.     intStartPos = intPos + 1
  84.     intPos = InStr(intStartPos, String2, Mid$(String1, intPtr, 1))
  85.     If intPos > 0 Then
  86.         If intPos > intStartPos + 3 Then     'No match if char is > 3 bytes away
  87.            intPos = intStartPos
  88.         Else
  89.             score = score + 1          'Update current score
  90.        End If
  91.     Else
  92.         intPos = intStartPos
  93.     End If
  94. Next intPtr
  95. End Sub
  96. Private Sub FuzzyAlg2(ByVal String1 As String, _
  97.                         ByVal String2 As String, _
  98.                         ByRef score As Integer, _
  99.                         ByRef TotScore As Integer)
  100. Dim intCurLen As Integer, intLen1 As Integer, intTo As Integer, intPtr As Integer, intPos As Integer
  101. Dim strWork As String
  102.  
  103. intLen1 = Len(String1)
  104. For intCurLen = 2 To intLen1
  105.     strWork = String2                          'Get a copy of String2
  106.    intTo = intLen1 - intCurLen + 1
  107.     TotScore = TotScore + Int(intLen1 / intCurLen)  'Update total possible score
  108.    For intPtr = 1 To intTo Step intCurLen
  109.         intPos = InStr(strWork, Mid$(String1, intPtr, intCurLen))
  110.         If intPos > 0 Then
  111.             Mid$(strWork, intPos, intCurLen) = String$(intCurLen, &H0) 'corrupt found string
  112.            score = score + 1     'Update current score
  113.        End If
  114.     Next intPtr
  115. Next intCurLen
  116.  
  117. End Sub
  118.  
  119. Function FuzzyVLookup(ByVal LookupValue As String, _
  120.                       ByVal TableArray As Range, _
  121.                       ByVal IndexNum As Integer, _
  122.                       Optional NFPercent As Single = 0.05, _
  123.                       Optional Rank As Integer = 1, _
  124.                       Optional Algorithm As Integer = 3, _
  125.                       Optional AdditionalCols As Integer = 0) As Variant
  126. '********************************************************************************
  127. '** Function to Fuzzy match LookupValue with entries in                        **
  128. '** column 1 of table specified by TableArray.                                 **
  129. '** TableArray must specify the top left cell of the range to be searched      **
  130. '** The function stops scanning the table when an empty cell in column 1       **
  131. '** is found.                                                                  **
  132. '** For each entry in column 1 of the table, FuzzyPercent is called to match   **
  133. '** LookupValue with the Table entry.                                          **
  134. '** 'Rank' is an optional parameter which may take any value > 0               **
  135. '**        (default 1) and causes the function to return the 'nth' best        **
  136. '**         match (where 'n' is defined by 'Rank' parameter)                   **
  137. '** If the 'Rank' match percentage < NFPercent (Default 5%), #N/A is returned. **
  138. '** IndexNum is the column number of the entry in TableArray required to be    **
  139. '** returned, as follows:                                                      **
  140. '** If IndexNum > 0 and the 'Rank' percentage match is >= NFPercent            **
  141. '**                 (Default 5%) the column entry indicated by IndexNum is     **
  142. '**                 returned.                                                  **
  143. '** if IndexNum = 0 and the 'Rank' percentage match is >= NFPercent            **
  144. '**                 (Default 5%) the offset row (starting at 1) is returned.   **
  145. '**                 This value can be used directly in the 'Index' function.   **
  146. '**                                                                            **
  147. '** Algorithm can take one of the following values:                            **
  148. '** Algorithm = 1:                                                             **
  149. '**     This algorithm is best suited for matching mis-spellings.              **
  150. '**     For each character in 'String1', a search is performed on 'String2'.   **
  151. '**     The search is deemed successful if a character is found in 'String2'   **
  152. '**     within 3 characters of the current position.                           **
  153. '**     A score is kept of matching characters which is returned as a          **
  154. '**     percentage of the total possible score.                                **
  155. '** Algorithm = 2:                                                             **
  156. '**     This algorithm is best suited for matching sentences, or               **
  157. '**     'firstname lastname' compared with 'lastname firstname' combinations   **
  158. '**     A count of matching pairs, triplets, quadruplets etc. in 'String1' and **
  159. '**     'String2' is returned as a percentage of the total possible.           **
  160. '** Algorithm = 3: Both Algorithms 1 and 2 are performed.                      **
  161. '********************************************************************************
  162. Dim r As Range
  163.  
  164. Dim strListString As String
  165. Dim strWork As String
  166.  
  167. Dim sngMinPercent As Single
  168. Dim sngWork As Single
  169. Dim sngCurPercent  As Single
  170. Dim intBestMatchPtr As Integer
  171. Dim intRankPtr As Integer
  172. Dim intRankPtr1 As Integer
  173. Dim i As Integer
  174.  
  175. Dim lEndRow As Long
  176.  
  177. Dim udRankData() As RankInfo
  178.  
  179. Dim vCurValue As Variant
  180.  
  181. '--------------------------------------------------------------
  182. '--    Validation                                            --
  183. '--------------------------------------------------------------
  184.  
  185. LookupValue = LCase$(Application.Trim(LookupValue))
  186.  
  187. If IsMissing(NFPercent) Then
  188.     sngMinPercent = 0.05
  189. Else
  190.     If (NFPercent <= 0) Or (NFPercent > 1) Then
  191.         FuzzyVLookup = "*** 'NFPercent' must be a percentage > zero ***"
  192.         Exit Function
  193.     End If
  194.     sngMinPercent = NFPercent
  195. End If
  196.  
  197. If Rank < 1 Then
  198.     FuzzyVLookup = "*** 'Rank' must be an integer > 0 ***"
  199.     Exit Function
  200. End If
  201.  
  202. ReDim udRankData(1 To Rank)
  203.  
  204. lEndRow = TableArray.Rows.Count
  205. If VarType(TableArray.Cells(lEndRow, 1).Value) = vbEmpty Then
  206.     lEndRow = TableArray.Cells(lEndRow, 1).End(xlUp).row
  207. End If
  208.  
  209. '---------------
  210. '-- Main loop --
  211. '---------------
  212. For Each r In Range(TableArray.Cells(1, 1), TableArray.Cells(lEndRow, 1))
  213.     vCurValue = ""
  214.     For i = 0 To AdditionalCols
  215.         vCurValue = vCurValue & r.Offset(0, i).Text
  216.     Next i
  217.     If VarType(vCurValue) = vbString Then
  218.         strListString = LCase$(Application.Trim(vCurValue))
  219.        
  220.         '------------------------------------------------
  221.        '-- Fuzzy match strings & get percentage match --
  222.        '------------------------------------------------
  223.        sngCurPercent = FuzzyPercent(String1:=LookupValue, _
  224.                                      String2:=strListString, _
  225.                                      Algorithm:=Algorithm, _
  226.                                      Normalised:=True)
  227.        
  228.         If sngCurPercent >= sngMinPercent Then
  229.             '---------------------------
  230.            '-- Store in ranked array --
  231.            '---------------------------
  232.            For intRankPtr = 1 To Rank
  233.                 If sngCurPercent > udRankData(intRankPtr).percentage Then
  234.                     For intRankPtr1 = Rank To intRankPtr + 1 Step -1
  235.                         With udRankData(intRankPtr1)
  236.                             .Offset = udRankData(intRankPtr1 - 1).Offset
  237.                             .percentage = udRankData(intRankPtr1 - 1).percentage
  238.                         End With
  239.                     Next intRankPtr1
  240.                     With udRankData(intRankPtr)
  241.                         .Offset = r.row
  242.                         .percentage = sngCurPercent
  243.                     End With
  244.                     Exit For
  245.                 End If
  246.             Next intRankPtr
  247.         End If
  248.        
  249.     End If
  250. Next r
  251.  
  252. If udRankData(Rank).percentage < sngMinPercent Then
  253.     '--------------------------------------
  254.    '-- Return '#N/A' if below NFPercent --
  255.    '--------------------------------------
  256.    FuzzyVLookup = CVErr(xlErrNA)
  257. Else
  258.     intBestMatchPtr = udRankData(Rank).Offset - TableArray.Cells(1, 1).row + 1
  259.     If IndexNum > 0 Then
  260.         '-----------------------------------
  261.        '-- Return column entry specified --
  262.        '-----------------------------------
  263.        FuzzyVLookup = TableArray.Cells(intBestMatchPtr, IndexNum)
  264.     Else
  265.         '-----------------------
  266.        '-- Return offset row --
  267.        '-----------------------
  268.        FuzzyVLookup = intBestMatchPtr
  269.     End If
  270. End If
  271. End Function
  272. Function FuzzyHLookup(ByVal LookupValue As String, _
  273.                       ByVal TableArray As Range, _
  274.                       ByVal IndexNum As Integer, _
  275.                       Optional NFPercent As Single = 0.05, _
  276.                       Optional Rank As Integer = 1, _
  277.                       Optional Algorithm As Integer = 3) As Variant
  278. '********************************************************************************
  279. '** Function to Fuzzy match LookupValue with entries in                        **
  280. '** row 1 of table specified by TableArray.                                    **
  281. '** TableArray must specify the top left cell of the range to be searched      **
  282. '** The function stops scanning the table when an empty cell in row 1          **
  283. '** is found.                                                                  **
  284. '** For each entry in row 1 of the table, FuzzyPercent is called to match      **
  285. '** LookupValue with the Table entry.                                          **
  286. '** 'Rank' is an optional parameter which may take any value > 0               **
  287. '**        (default 1) and causes the function to return the 'nth' best        **
  288. '**         match (where 'n' is defined by 'Rank' parameter)                   **
  289. '** If the 'Rank' match percentage < NFPercent (Default 5%), #N/A is returned. **
  290. '** IndexNum is the row number of the entry in TableArray required to be       **
  291. '** returned, as follows:                                                      **
  292. '** If IndexNum > 0 and the 'Rank' percentage match is >= NFPercent            **
  293. '**                 (Default 5%) the row entry indicated by IndexNum is        **
  294. '**                 returned.                                                  **
  295. '** if IndexNum = 0 and the 'Rank' percentage match is >= NFPercent            **
  296. '**                 (Default 5%) the offset col (starting at 0) is returned.   **
  297. '**                 This value can be used directly in the 'OffSet' function.  **
  298. '**                                                                            **
  299. '** Algorithm can take one of the following values:                            **
  300. '** Algorithm = 1:                                                             **
  301. '**     For each character in 'String1', a search is performed on 'String2'.   **
  302. '**     The search is deemed successful if a character is found in 'String2'   **
  303. '**     within 3 characters of the current position.                           **
  304. '**     A score is kept of matching characters which is returned as a          **
  305. '**     percentage of the total possible score.                                **
  306. '** Algorithm = 2:                                                             **
  307. '**     A count of matching pairs, triplets, quadruplets etc. in 'String1' and **
  308. '**     'String2' is returned as a percentage of the total possible.           **
  309. '** Algorithm = 3: Both Algorithms 1 and 2 are performed.                      **
  310. '********************************************************************************
  311. Dim r As Range
  312.  
  313. Dim strListString As String
  314. Dim strWork As String
  315.  
  316. Dim sngMinPercent As Single
  317. Dim sngWork As Single
  318. Dim sngCurPercent  As Single
  319.  
  320. Dim intBestMatchPtr As Integer
  321. Dim intPtr As Integer
  322. Dim intRankPtr As Integer
  323. Dim intRankPtr1 As Integer
  324.  
  325. Dim iEndCol As Integer
  326.  
  327. Dim udRankData() As RankInfo
  328.  
  329. Dim vCurValue As Variant
  330. '--------------------------------------------------------------
  331. '--    Validation                                            --
  332. '--------------------------------------------------------------
  333. LookupValue = LCase$(Application.Trim(LookupValue))
  334.  
  335. If IsMissing(NFPercent) Then
  336.     sngMinPercent = 0.05
  337. Else
  338.     If (NFPercent <= 0) Or (NFPercent > 1) Then
  339.         FuzzyHLookup = "*** 'NFPercent' must be a percentage > zero ***"
  340.         Exit Function
  341.     End If
  342.     sngMinPercent = NFPercent
  343. End If
  344.  
  345. If Rank < 1 Then
  346.     FuzzyHLookup = "*** 'Rank' must be an integer > 0 ***"
  347.     Exit Function
  348. End If
  349.  
  350. ReDim udRankData(1 To Rank)
  351. '**************************
  352. iEndCol = TableArray.Columns.Count
  353. If VarType(TableArray.Cells(1, iEndCol).Value) = vbEmpty Then
  354.     iEndCol = TableArray.Cells(1, iEndCol).End(xlToLeft).Column
  355. End If
  356.  
  357. '---------------
  358. '-- Main loop --
  359. '---------------
  360. For Each r In Range(TableArray.Cells(1, 1), TableArray.Cells(1, iEndCol))
  361.     vCurValue = r.Value
  362.     If VarType(vCurValue) = vbString Then
  363.         strListString = LCase$(Application.Trim(vCurValue))
  364.        
  365.         '------------------------------------------------
  366.        '-- Fuzzy match strings & get percentage match --
  367.        '------------------------------------------------
  368.        sngCurPercent = FuzzyPercent(String1:=LookupValue, _
  369.                                      String2:=strListString, _
  370.                                      Algorithm:=Algorithm, _
  371.                                      Normalised:=True)
  372.        
  373.         If sngCurPercent >= sngMinPercent Then
  374.             '---------------------------
  375.            '-- Store in ranked array --
  376.            '---------------------------
  377.            For intRankPtr = 1 To Rank
  378.                 If sngCurPercent > udRankData(intRankPtr).percentage Then
  379.                     For intRankPtr1 = Rank To intRankPtr + 1 Step -1
  380.                         With udRankData(intRankPtr1)
  381.                             .Offset = udRankData(intRankPtr1 - 1).Offset
  382.                             .percentage = udRankData(intRankPtr1 - 1).percentage
  383.                         End With
  384.                     Next intRankPtr1
  385.                     With udRankData(intRankPtr)
  386.                         .Offset = r.Column
  387.                         .percentage = sngCurPercent
  388.                     End With
  389.                     Exit For
  390.                 End If
  391.             Next intRankPtr
  392.         End If
  393.        
  394.     End If
  395. Next r
  396.  
  397. If udRankData(Rank).percentage < sngMinPercent Then
  398.     '--------------------------------------
  399.    '-- Return '#N/A' if below NFPercent --
  400.    '--------------------------------------
  401.    FuzzyHLookup = CVErr(xlErrNA)
  402. Else
  403.     intBestMatchPtr = udRankData(Rank).Offset - TableArray.Cells(1, 1).Column + 1
  404.     If IndexNum > 0 Then
  405.         '-----------------------------------
  406.        '-- Return row entry specified --
  407.        '-----------------------------------
  408.        FuzzyHLookup = TableArray.Cells(IndexNum, intBestMatchPtr)
  409.     Else
  410.         '-----------------------
  411.        '-- Return offset col --
  412.        '-----------------------
  413.        FuzzyHLookup = intBestMatchPtr
  414.     End If
  415. End If
  416. End Function
  417.  
  418.  
  419.  
  420. Public Function Levenshtein(s1 As String, s2 As String)
  421.  
  422. Dim i As Integer
  423. Dim j As Integer
  424. Dim L1 As Integer
  425. Dim L2 As Integer
  426. Dim D() As Integer
  427. Dim min1 As Integer
  428. Dim min2 As Integer
  429.  
  430. L1 = Len(s1)
  431. L2 = Len(s2)
  432. ReDim D(L1, L2)
  433. For i = 0 To L1
  434.     D(i, 0) = i
  435. Next
  436. For j = 0 To L2
  437.     D(0, j) = j
  438. Next
  439. For i = 1 To L1
  440.     For j = 1 To L2
  441.         If Mid(s1, i, 1) = Mid(s2, j, 1) Then
  442.             D(i, j) = D(i - 1, j - 1)
  443.         Else
  444.             min1 = D(i - 1, j) + 1
  445.             min2 = D(i, j - 1) + 1
  446.             If min2 < min1 Then
  447.                 min1 = min2
  448.             End If
  449.             min2 = D(i - 1, j - 1) + 1
  450.             If min2 < min1 Then
  451.                 min1 = min2
  452.             End If
  453.             D(i, j) = min1
  454.         End If
  455.     Next
  456. Next
  457. Levenshtein = D(L1, L2)
  458. End Function
  459.  
  460.  
Add Comment
Please, Sign In to add comment