1. # Construct a regex fragment for handling normal tags. These can
  2. # be either a start or an end tag.
  3. my $standard_tags_fragment = join '|', qw{
  4.     h1  h2  h3  ul  ol  li
  5.     b   i   strong  em
  6.     s   sub super   strike
  7.     p   pre code    blockquote
  8. };;
  9.  
  10. # Define a regex to match standard, open or close tags
  11. my $standard_tags_re = qr{
  12.     <                               # Open tag
  13.         /?                          # Optional leading slash
  14.         (?:$standard_tags_fragment) # Regex fragment to match tagnames
  15.     >                               # Close tag
  16. }xms;
  17.  
  18. # Construct a regex fragment for handling single, self-closing tags
  19. my $single_tags_fragment = join '|', qw{ br hr };
  20.  
  21. my $single_tags_re = qr{
  22.     <                               # Open tag
  23.         (?:$single_tags_fragment)   # Regex fragment to match single tags
  24.         \s*/?                       # Optional self-closing slash, with optional space
  25.     >                               # Close tag
  26. };
  27.  
  28. # Construct the final whitespace regexp
  29. my $whitelist_re = qr{
  30.     $single_tags_re             # Match an open or close tag
  31.     |   $standard_tags_re       # Match a self-closing single tag
  32.  
  33.     # Special cases
  34.     |   <a[^>]+>                # Match 'a' tags regardless of attributes
  35.     |   <img[^>]+>              # Match 'img' regardless of attributes
  36.     |   </a>                    # Special case to catch close 'a' tags
  37. }xms;