
Dan
By: a guest on
Jun 27th, 2008 | syntax:
Perl | size: 1.41 KB | hits: 397 | expires: Never
# Construct a regex fragment for handling normal tags. These can
# be either a start or an end tag.
my $standard_tags_fragment = join '|', qw{
h1 h2 h3 ul ol li
b i strong em
p pre code blockquote
};;
# Define a regex to match standard, open or close tags
my $standard_tags_re = qr{
< # Open tag
/? # Optional leading slash
(?:$standard_tags_fragment) # Regex fragment to match tagnames
> # Close tag
}xms;
# Construct a regex fragment for handling single, self-closing tags
my $single_tags_fragment = join '|', qw{ br hr
};
< # Open tag
(?:$single_tags_fragment) # Regex fragment to match single tags
\
s*/? # Optional self-closing slash, with optional space
> # Close tag
};
# Construct the final whitespace regexp
$single_tags_re # Match an open or close tag
| $standard_tags_re # Match a self-closing single tag
# Special cases
| <a[^>]+> # Match 'a' tags regardless of attributes
| <img[^>]+> # Match 'img' regardless of attributes
| </a> # Special case to catch close 'a' tags
}xms;