# Construct a regex fragment for handling normal tags. These can
# be either a start or an end tag.
my $standard_tags_fragment = join '|', qw{
h1 h2 h3 ul ol li
b i strong em
s sub super strike
p pre code blockquote
};;
# Define a regex to match standard, open or close tags
my $standard_tags_re = qr{
< # Open tag
/? # Optional leading slash
(?:$standard_tags_fragment) # Regex fragment to match tagnames
> # Close tag
}xms;
# Construct a regex fragment for handling single, self-closing tags
my $single_tags_fragment = join '|', qw{ br hr };
my $single_tags_re = qr{
< # Open tag
(?:$single_tags_fragment) # Regex fragment to match single tags
\s*/? # Optional self-closing slash, with optional space
> # Close tag
};
# Construct the final whitespace regexp
my $whitelist_re = qr{
$single_tags_re # Match an open or close tag
| $standard_tags_re # Match a self-closing single tag
# Special cases
| <a[^>]+> # Match 'a' tags regardless of attributes
| <img[^>]+> # Match 'img' regardless of attributes
| </a> # Special case to catch close 'a' tags
}xms;