Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Public Function HTMLDecode(ByRef sourceText As String) As String
- Const PROC_NAME As String = "HTMLDecode"
- Const VB_HEX_PREFIX As String = "&h"
- Const DEC_CHARACTER_REFERENCE_PREFIX As String = "#"
- Const HEX_CHARACTER_REFERENCE_PREFIX As String = "#x"
- Dim characterReferences As VBScript_RegExp_55.MatchCollection
- Set characterReferences = ExtractCharacterReferences(sourceText)
- Static namedEntities As Scripting.Dictionary
- If namedEntities Is Nothing Then
- Set namedEntities = CreateEntityDictionary
- End If
- Dim currentPosition As Long
- Dim nextPosition As Long
- Dim characterReference As Match
- currentPosition = 1
- Dim refCounter As Long
- ReDim resultParts(characterReferences.Count * 2) As String
- For Each characterReference In characterReferences
- nextPosition = characterReference.FirstIndex
- Dim character As String
- Dim refText As String
- refText = characterReference.SubMatches.Item(1)
- If namedEntities.Exists(refText) Then
- 'characterReference is a named entity
- character = ChrW$(namedEntities.Item(refText))
- Else
- 'characterReference is probably a numeric entity
- Select Case characterReference.SubMatches.Item(0)
- Case DEC_CHARACTER_REFERENCE_PREFIX
- If IsNumeric(refText) Then
- 'entity is a decimal codepoint
- character = ChrW$(CLng(refText))
- Else
- Err.Raise ERROR_UNEXPECTED_ENTITY_FORMAT, PROC_NAME, "Unexpected decimal entity value '" & characterReference.Value & "'"
- End If
- Case HEX_CHARACTER_REFERENCE_PREFIX
- Dim hexRefText As String
- hexRefText = VB_HEX_PREFIX & refText
- If IsNumeric(hexRefText) Then
- 'entity is a hex codepoint
- character = ChrW$(CLng(hexRefText))
- Else
- Err.Raise ERROR_UNEXPECTED_ENTITY_FORMAT, PROC_NAME, "Unexpected hex entity value '" & characterReference.Value & "'"
- End If
- Case Else
- 'Unexpected entity text
- Err.Raise ERROR_UNEXPECTED_ENTITY_FORMAT, PROC_NAME, "Unexpected entity name '" & characterReference.Value & "'"
- End Select
- End If
- resultParts(refCounter) = Mid(sourceText, currentPosition, nextPosition - currentPosition + 1)
- resultParts(refCounter + 1) = character
- currentPosition = nextPosition + characterReference.Length + 1
- refCounter = refCounter + 2
- Next characterReference
- 'Catch any text trailing the last entity
- If currentPosition <= Len(sourceText) Then
- resultParts(refCounter) = Mid$(sourceText, currentPosition)
- End If
- HTMLDecode = Join(resultParts, vbNullString)
- End Function
- Option Explicit
- Private Const ERROR_UNEXPECTED_ENTITY_COUNT As Long = vbObjectError Or 1
- Private Const ERROR_UNEXPECTED_ENTITY_DUPLICATE As Long = vbObjectError Or 2
- Private Const ERROR_UNEXPECTED_ENTITY_VALUE As Long = vbObjectError Or 3
- Private Const ERROR_UNEXPECTED_ENTITY_FORMAT As Long = vbObjectError Or 4
- Private Function CreateEntityDictionary() As Scripting.Dictionary
- Const PROC_NAME As String = "getEntityDictionary"
- Const ENTITY_NAMES = _
- "quot,amp,apos,lt,gt,nbsp,iexcl,cent,pound,curren,yen,brvbar,sect,uml,copy,ordf," & _
- "laquo,not,shy,reg,macr,deg,plusmn,sup2,sup3,acute,micro,para,middot,cedil,sup1,ordm," & _
- "raquo,frac14,frac12,frac34,iquest,Agrave,Aacute,Acirc,Atilde,Auml,Aring,AElig,Ccedil,Egrave,Eacute,Ecirc," & _
- "Euml,Igrave,Iacute,Icirc,Iuml,ETH,Ntilde,Ograve,Oacute,Ocirc,Otilde,Ouml,times,Oslash,Ugrave,Uacute," & _
- "Ucirc,Uuml,Yacute,THORN,szlig,agrave,aacute,acirc,atilde,auml,aring,aelig,ccedil,egrave,eacute,ecirc," & _
- "euml,igrave,iacute,icirc,iuml,eth,ntilde,ograve,oacute,ocirc,otilde,ouml,divide,oslash,ugrave,uacute," & _
- "ucirc,uuml,yacute,thorn,yuml,OElig,oelig,Scaron,scaron,Yuml,fnof,circ,tilde,Alpha,Beta,Gamma," & _
- "Delta,Epsilon,Zeta,Eta,Theta,Iota,Kappa,Lambda,Mu,Nu,Xi,Omicron,Pi,Rho,Sigma,Tau," & _
- "Upsilon,Phi,Chi,Psi,Omega,alpha,beta,gamma,delta,epsilon,zeta,eta,theta,iota,kappa,lambda," & _
- "mu,nu,xi,omicron,pi,rho,sigmaf,sigma,tau,upsilon,phi,chi,psi,omega,thetasym,upsih," & _
- "piv,ensp,emsp,thinsp,zwnj,zwj,lrm,rlm,ndash,mdash,lsquo,rsquo,sbquo,ldquo,rdquo,bdquo," & _
- "dagger,Dagger,bull,hellip,permil,prime,Prime,lsaquo,rsaquo,oline,frasl,euro,image,weierp,real,trade," & _
- "alefsym,larr,uarr,rarr,darr,harr,crarr,lArr,uArr,rArr,dArr,hArr,forall,part,exist,empty," & _
- "nabla,isin,notin,ni,prod,sum,minus,lowast,radic,prop,infin,ang,and,or,cap,cup," & _
- "int,there4,sim,cong,asymp,ne,equiv,le,ge,sub,sup,nsub,sube,supe,oplus,otimes," & _
- "perp,sdot,lceil,rceil,lfloor,rfloor,lang,rang,loz,spades,clubs,hearts,diams"
- Const ENTITY_VALUES = _
- "34,38,39,60,62,160,161,162,163,164,165,166,167,168,169,170," & _
- "171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186," & _
- "187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202," & _
- "203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218," & _
- "219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234," & _
- "235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250," & _
- "251,252,253,254,255,338,339,352,353,376,402,710,732,913,914,915," & _
- "916,917,918,919,920,921,922,923,924,925,926,927,928,929,931,932," & _
- "933,934,935,936,937,945,946,947,948,949,950,951,952,953,954,955," & _
- "956,957,958,959,960,961,962,963,964,965,966,967,968,969,977,978," & _
- "982,8194,8195,8201,8204,8205,8206,8207,8211,8212,8216,8217,8218,8220,8221,8222," & _
- "8224,8225,8226,8230,8240,8242,8243,8249,8250,8254,8260,8364,8465,8472,8476,8482," & _
- "8501,8592,8593,8594,8595,8596,8629,8656,8657,8658,8659,8660,8704,8706,8707,8709," & _
- "8711,8712,8713,8715,8719,8721,8722,8727,8730,8733,8734,8736,8743,8744,8745,8746," & _
- "8747,8756,8764,8773,8776,8800,8801,8804,8805,8834,8835,8836,8838,8839,8853,8855," & _
- "8869,8901,8968,8969,8970,8971,9001,9002,9674,9824,9827,9829,9830"
- Const ENTITY_DELIMITER As String = ","
- Set CreateEntityDictionary = New Scripting.Dictionary
- 'Entity names must be case sensitive
- CreateEntityDictionary.CompareMode = BinaryCompare
- Dim entityNames() As String
- entityNames = Split(ENTITY_NAMES, ENTITY_DELIMITER)
- Dim entityValues() As String
- entityValues = Split(ENTITY_VALUES, ENTITY_DELIMITER)
- If UBound(entityNames) = UBound(entityValues) Then
- With CreateEntityDictionary
- Dim entityCounter As Long
- For entityCounter = LBound(entityNames) To UBound(entityNames)
- If Not .Exists(entityNames(entityCounter)) Then
- If IsNumeric(entityValues(entityCounter)) Then
- .Add entityNames(entityCounter), CLng(entityValues(entityCounter))
- Else
- Err.Raise ERROR_UNEXPECTED_ENTITY_VALUE, PROC_NAME, "Unexpected entity value: " & entityValues(entityCounter)
- End If
- Else
- Err.Raise ERROR_UNEXPECTED_ENTITY_DUPLICATE, PROC_NAME, "Unexpected duplicate entity name: " & entityNames(entityCounter)
- End If
- Next entityCounter
- End With
- Else
- Err.Raise ERROR_UNEXPECTED_ENTITY_COUNT, PROC_NAME, "Unexpected number of entity names/values"
- End If
- End Function
- Private Function ExtractCharacterReferences(ByRef sourceText As String) As VBScript_RegExp_55.MatchCollection
- Const REFERENCE_PATTERN As String = "&(#{0,1}[x]{0,1})(w{1,8});"
- With New VBScript_RegExp_55.RegExp
- .Global = True
- .Pattern = REFERENCE_PATTERN
- Set ExtractCharacterReferences = .Execute(sourceText)
- End With
- End Function
- ?htmlhelpers.HTMLDecode("",&,',<,>, ,¡,¢,£,¤")
- ",&,',<,>, ,¡,¢,£,¤
- ?htmlhelpers.HTMLDecode("Bill & Ted&#apos;s Excellent Adventure")
- Bill & Ted's Excellent Adventure
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement