
Mike
By: a guest on
Nov 10th, 2009 | syntax:
Perl | size: 1.70 KB | hits: 151 | expires: Never
#!/usr/bin/perl
use warnings;
use strict;
use HTML::TreeBuilder;
use CSS::DOM::Style;
my $html = <<HTML;
<p style="text-align:center"><span style="font-weight:bold;font-style:italic;">Here's some text here.</span></p>
<p><span style="text-decoration:underline;">And some more.</span> Yet even more!</p>
HTML
my $tb = HTML::TreeBuilder->new_from_content($html);
my @replacements = (
{ property => 'font-style', value => 'italic', replacement => 'em' },
{ property => 'font-weight', value => 'bold', replacement => 'strong' },
{ property => 'text-align', value => 'center', replacement => 'center' },
{ property => 'text-decoration', value => 'underline', replacement => 'u' },
);
# build a sensible list of tag names (or just use sub { 1 })
my @nodes = $tb->look_down(sub { $_[0]->tag =~ /^(p|span)$/ });
for my $el (@nodes) {
if ($el->attr('style')) {
my $st = CSS::DOM::Style::parse($el->attr('style'));
if ($st) {
foreach my $h (@replacements) {
if ($st->getPropertyValue($h->{property}) eq $h->{value}) {
$st->removeProperty($h->{property});
my $new = HTML::Element->new($h->{replacement});
foreach my $inner ($el->detach_content) {
$new->push_content($inner);
}
$el->push_content($new);
}
}
$el->attr('style', $st->cssText ? $st->cssText : undef);
}
}
}
my $src = $tb->as_HTML(undef, "\t", {}); #added empty hashref, it replaced the ending </p> that as_HTML cut off.
$src =~ s{<p
.*?>}{<p>}gs
;
$src =~ s{</?(div
|span
).*?>}{}gs
;