Guest User

Untitled

a guest
Jun 19th, 2018
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.54 KB | None | 0 0
  1. Public Function CleanText(ByVal str As String) As String
  2. 'removes HTML tags and other characters that title tags and descriptions don't like
  3. If Not String.IsNullOrEmpty(str) Then
  4. 'mini db of extended tags to get rid of
  5. Dim indexChars() As String = {"<a", "<img", "<input type=""hidden"" name=""tax""", "<input type=""hidden"" name=""handling""", "<span", "<p", "<ul", "<div", "<embed", "<object", "<param"}
  6.  
  7. For i As Integer = 0 To indexChars.GetUpperBound(0) 'loop through indexchars array
  8. Dim indexOfInput As Integer = 0
  9. Do 'get rid of links
  10. indexOfInput = str.IndexOf(indexChars(i)) 'find instance of indexChar
  11. If indexOfInput <> -1 Then
  12. Dim indexNextLeftBracket As Integer = str.IndexOf("<", indexOfInput) + 1
  13. Dim indexRightBracket As Integer = str.IndexOf(">", indexOfInput) + 1
  14. 'check to make sure a right bracket hasn't been left off a tag
  15. If indexNextLeftBracket > indexRightBracket Then 'normal case
  16. str = str.Remove(indexOfInput, indexRightBracket - indexOfInput)
  17. Else
  18. 'add the right bracket right before the next left bracket, just remove everything
  19. 'in the bad tag
  20. str = str.Insert(indexNextLeftBracket - 1, ">")
  21. indexRightBracket = str.IndexOf(">", indexOfInput) + 1
  22. str = str.Remove(indexOfInput, indexRightBracket - indexOfInput)
  23. End If
  24. End If
  25. Loop Until indexOfInput = -1
  26. Next
  27. End If
  28. Return str
  29. End Function
  30.  
  31. public string StripHTMLTags(string text)
  32. {
  33. return Regex.Replace(text, @"<(.|n)*?>", string.Empty);
  34. }
  35.  
  36. @"<(?:.|n)*?>"
  37.  
  38. Dim indexNextLeftBracket As Integer = str.IndexOf("<", indexOfInput) + 1
  39.  
  40. Dim indexNextLeftBracket As Integer = str.IndexOf("<", indexOfInput+1) + 1
  41.  
  42. public static string RemoveTags( string html, params string[] allowList )
  43. {
  44. if( html == null ) return null;
  45. Regex regex = new Regex( @"(?<Tag><(?<TagName>[a-z/]+)S*?[^<]*?>)",
  46. RegexOptions.Compiled |
  47. RegexOptions.IgnoreCase |
  48. RegexOptions.Multiline );
  49. return regex.Replace(
  50. html,
  51. new MatchEvaluator(
  52. new TagMatchEvaluator( allowList ).Replace ) );
  53. }
  54.  
  55. private class TagMatchEvaluator
  56. {
  57. private readonly ArrayList _allowed = null;
  58.  
  59. public TagMatchEvaluator( string[] allowList )
  60. {
  61. _allowed = new ArrayList( allowList );
  62. }
  63.  
  64. public string Replace( Match match )
  65. {
  66. if( _allowed.Contains( match.Groups[ "TagName" ].Value ) )
  67. return match.Value;
  68. return "";
  69. }
  70. }
Add Comment
Please, Sign In to add comment