Advertisement
Guest User

Untitled

a guest
Oct 25th, 2016
58
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.64 KB | None | 0 0
  1. Public Function HTMLDecode(ByRef sourceText As String) As String
  2.  
  3. Const PROC_NAME As String = "HTMLDecode"
  4.  
  5. Const VB_HEX_PREFIX As String = "&h"
  6. Const DEC_CHARACTER_REFERENCE_PREFIX As String = "#"
  7. Const HEX_CHARACTER_REFERENCE_PREFIX As String = "#x"
  8.  
  9. Dim characterReferences As VBScript_RegExp_55.MatchCollection
  10. Set characterReferences = ExtractCharacterReferences(sourceText)
  11.  
  12. Static namedEntities As Scripting.Dictionary
  13. If namedEntities Is Nothing Then
  14. Set namedEntities = CreateEntityDictionary
  15. End If
  16.  
  17. Dim currentPosition As Long
  18. Dim nextPosition As Long
  19. Dim characterReference As Match
  20.  
  21. currentPosition = 1
  22. Dim refCounter As Long
  23.  
  24. ReDim resultParts(characterReferences.Count * 2) As String
  25. For Each characterReference In characterReferences
  26. nextPosition = characterReference.FirstIndex
  27. Dim character As String
  28. Dim refText As String
  29. refText = characterReference.SubMatches.Item(1)
  30. If namedEntities.Exists(refText) Then
  31. 'characterReference is a named entity
  32. character = ChrW$(namedEntities.Item(refText))
  33. Else
  34. 'characterReference is probably a numeric entity
  35. Select Case characterReference.SubMatches.Item(0)
  36. Case DEC_CHARACTER_REFERENCE_PREFIX
  37. If IsNumeric(refText) Then
  38. 'entity is a decimal codepoint
  39. character = ChrW$(CLng(refText))
  40. Else
  41. Err.Raise ERROR_UNEXPECTED_ENTITY_FORMAT, PROC_NAME, "Unexpected decimal entity value '" & characterReference.Value & "'"
  42. End If
  43. Case HEX_CHARACTER_REFERENCE_PREFIX
  44. Dim hexRefText As String
  45. hexRefText = VB_HEX_PREFIX & refText
  46. If IsNumeric(hexRefText) Then
  47. 'entity is a hex codepoint
  48. character = ChrW$(CLng(hexRefText))
  49. Else
  50. Err.Raise ERROR_UNEXPECTED_ENTITY_FORMAT, PROC_NAME, "Unexpected hex entity value '" & characterReference.Value & "'"
  51. End If
  52. Case Else
  53. 'Unexpected entity text
  54. Err.Raise ERROR_UNEXPECTED_ENTITY_FORMAT, PROC_NAME, "Unexpected entity name '" & characterReference.Value & "'"
  55. End Select
  56. End If
  57. resultParts(refCounter) = Mid(sourceText, currentPosition, nextPosition - currentPosition + 1)
  58. resultParts(refCounter + 1) = character
  59. currentPosition = nextPosition + characterReference.Length + 1
  60. refCounter = refCounter + 2
  61. Next characterReference
  62.  
  63. 'Catch any text trailing the last entity
  64. If currentPosition <= Len(sourceText) Then
  65. resultParts(refCounter) = Mid$(sourceText, currentPosition)
  66. End If
  67. HTMLDecode = Join(resultParts, vbNullString)
  68.  
  69. End Function
  70.  
  71. Option Explicit
  72.  
  73. Private Const ERROR_UNEXPECTED_ENTITY_COUNT As Long = vbObjectError Or 1
  74. Private Const ERROR_UNEXPECTED_ENTITY_DUPLICATE As Long = vbObjectError Or 2
  75. Private Const ERROR_UNEXPECTED_ENTITY_VALUE As Long = vbObjectError Or 3
  76. Private Const ERROR_UNEXPECTED_ENTITY_FORMAT As Long = vbObjectError Or 4
  77.  
  78. Private Function CreateEntityDictionary() As Scripting.Dictionary
  79.  
  80. Const PROC_NAME As String = "getEntityDictionary"
  81.  
  82. Const ENTITY_NAMES = _
  83. "quot,amp,apos,lt,gt,nbsp,iexcl,cent,pound,curren,yen,brvbar,sect,uml,copy,ordf," & _
  84. "laquo,not,shy,reg,macr,deg,plusmn,sup2,sup3,acute,micro,para,middot,cedil,sup1,ordm," & _
  85. "raquo,frac14,frac12,frac34,iquest,Agrave,Aacute,Acirc,Atilde,Auml,Aring,AElig,Ccedil,Egrave,Eacute,Ecirc," & _
  86. "Euml,Igrave,Iacute,Icirc,Iuml,ETH,Ntilde,Ograve,Oacute,Ocirc,Otilde,Ouml,times,Oslash,Ugrave,Uacute," & _
  87. "Ucirc,Uuml,Yacute,THORN,szlig,agrave,aacute,acirc,atilde,auml,aring,aelig,ccedil,egrave,eacute,ecirc," & _
  88. "euml,igrave,iacute,icirc,iuml,eth,ntilde,ograve,oacute,ocirc,otilde,ouml,divide,oslash,ugrave,uacute," & _
  89. "ucirc,uuml,yacute,thorn,yuml,OElig,oelig,Scaron,scaron,Yuml,fnof,circ,tilde,Alpha,Beta,Gamma," & _
  90. "Delta,Epsilon,Zeta,Eta,Theta,Iota,Kappa,Lambda,Mu,Nu,Xi,Omicron,Pi,Rho,Sigma,Tau," & _
  91. "Upsilon,Phi,Chi,Psi,Omega,alpha,beta,gamma,delta,epsilon,zeta,eta,theta,iota,kappa,lambda," & _
  92. "mu,nu,xi,omicron,pi,rho,sigmaf,sigma,tau,upsilon,phi,chi,psi,omega,thetasym,upsih," & _
  93. "piv,ensp,emsp,thinsp,zwnj,zwj,lrm,rlm,ndash,mdash,lsquo,rsquo,sbquo,ldquo,rdquo,bdquo," & _
  94. "dagger,Dagger,bull,hellip,permil,prime,Prime,lsaquo,rsaquo,oline,frasl,euro,image,weierp,real,trade," & _
  95. "alefsym,larr,uarr,rarr,darr,harr,crarr,lArr,uArr,rArr,dArr,hArr,forall,part,exist,empty," & _
  96. "nabla,isin,notin,ni,prod,sum,minus,lowast,radic,prop,infin,ang,and,or,cap,cup," & _
  97. "int,there4,sim,cong,asymp,ne,equiv,le,ge,sub,sup,nsub,sube,supe,oplus,otimes," & _
  98. "perp,sdot,lceil,rceil,lfloor,rfloor,lang,rang,loz,spades,clubs,hearts,diams"
  99.  
  100. Const ENTITY_VALUES = _
  101. "34,38,39,60,62,160,161,162,163,164,165,166,167,168,169,170," & _
  102. "171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186," & _
  103. "187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202," & _
  104. "203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218," & _
  105. "219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234," & _
  106. "235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250," & _
  107. "251,252,253,254,255,338,339,352,353,376,402,710,732,913,914,915," & _
  108. "916,917,918,919,920,921,922,923,924,925,926,927,928,929,931,932," & _
  109. "933,934,935,936,937,945,946,947,948,949,950,951,952,953,954,955," & _
  110. "956,957,958,959,960,961,962,963,964,965,966,967,968,969,977,978," & _
  111. "982,8194,8195,8201,8204,8205,8206,8207,8211,8212,8216,8217,8218,8220,8221,8222," & _
  112. "8224,8225,8226,8230,8240,8242,8243,8249,8250,8254,8260,8364,8465,8472,8476,8482," & _
  113. "8501,8592,8593,8594,8595,8596,8629,8656,8657,8658,8659,8660,8704,8706,8707,8709," & _
  114. "8711,8712,8713,8715,8719,8721,8722,8727,8730,8733,8734,8736,8743,8744,8745,8746," & _
  115. "8747,8756,8764,8773,8776,8800,8801,8804,8805,8834,8835,8836,8838,8839,8853,8855," & _
  116. "8869,8901,8968,8969,8970,8971,9001,9002,9674,9824,9827,9829,9830"
  117.  
  118. Const ENTITY_DELIMITER As String = ","
  119.  
  120. Set CreateEntityDictionary = New Scripting.Dictionary
  121. 'Entity names must be case sensitive
  122. CreateEntityDictionary.CompareMode = BinaryCompare
  123.  
  124. Dim entityNames() As String
  125. entityNames = Split(ENTITY_NAMES, ENTITY_DELIMITER)
  126.  
  127. Dim entityValues() As String
  128. entityValues = Split(ENTITY_VALUES, ENTITY_DELIMITER)
  129.  
  130. If UBound(entityNames) = UBound(entityValues) Then
  131. With CreateEntityDictionary
  132. Dim entityCounter As Long
  133. For entityCounter = LBound(entityNames) To UBound(entityNames)
  134. If Not .Exists(entityNames(entityCounter)) Then
  135. If IsNumeric(entityValues(entityCounter)) Then
  136. .Add entityNames(entityCounter), CLng(entityValues(entityCounter))
  137. Else
  138. Err.Raise ERROR_UNEXPECTED_ENTITY_VALUE, PROC_NAME, "Unexpected entity value: " & entityValues(entityCounter)
  139. End If
  140. Else
  141. Err.Raise ERROR_UNEXPECTED_ENTITY_DUPLICATE, PROC_NAME, "Unexpected duplicate entity name: " & entityNames(entityCounter)
  142. End If
  143. Next entityCounter
  144. End With
  145. Else
  146. Err.Raise ERROR_UNEXPECTED_ENTITY_COUNT, PROC_NAME, "Unexpected number of entity names/values"
  147. End If
  148.  
  149. End Function
  150.  
  151. Private Function ExtractCharacterReferences(ByRef sourceText As String) As VBScript_RegExp_55.MatchCollection
  152.  
  153. Const REFERENCE_PATTERN As String = "&(#{0,1}[x]{0,1})(w{1,8});"
  154.  
  155. With New VBScript_RegExp_55.RegExp
  156. .Global = True
  157. .Pattern = REFERENCE_PATTERN
  158. Set ExtractCharacterReferences = .Execute(sourceText)
  159. End With
  160.  
  161. End Function
  162.  
  163. ?htmlhelpers.HTMLDecode("",&,&apos;,<,>,&nbsp;,&iexcl;,&cent;,&pound;,&curren;")
  164. ",&,',<,>, ,¡,¢,£,¤
  165.  
  166. ?htmlhelpers.HTMLDecode("Bill&#xa0;&&#x000000A0;Ted&#apos;s&#160;Excellent&#00000160;Adventure")
  167. Bill & Ted's Excellent Adventure
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement