Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use XML::LibXML;
- use Encode;
- #use utf8;
- use File::Slurp;
- use HTML::Entities;
- print "$ARGV[1]<<<";
- my $parser=XML::LibXML->new;
- %entity2char =
- ('Auml' => '196',
- 'Ouml' => '214',
- 'Uuml' => '220',
- 'szlig' => '223',
- 'auml' => '228',
- 'ouml' => '246',
- 'uuml' => '252',
- 'quot' => '34',
- 'amp' => '38',
- 'lt' => '60',
- 'gt' => '62',
- 'lsqb' => '91',
- 'rsqb' => '93',
- 'lcub' => '123',
- 'rcub' => '125',
- 'nbsp' => '160',
- 'iexcl' => '161',
- 'cent' => '162',
- 'pound' => '163',
- 'curren' => '164',
- 'yen' => '165',
- 'brvbar' => '166',
- 'sect' => '167',
- 'uml' => '168',
- 'copy' => '169',
- 'ordf' => '170',
- 'laquo' => '171',
- 'not' => '172',
- 'shy' => '173',
- 'reg' => '174',
- 'macr' => '175',
- 'deg' => '176',
- 'plusmn' => '177',
- 'sup2' => '178',
- 'sup3' => '179',
- 'acute' => '180',
- 'micro' => '181',
- 'para' => '182',
- 'middot' => '183',
- 'cedil' => '184',
- 'sup1' => '185',
- 'ordm' => '186',
- 'raquo' => '187',
- 'frac14' => '188',
- 'frac12' => '189',
- 'frac34' => '190',
- 'iquest' => '191',
- 'Agrave' => '192',
- 'Aacute' => '193',
- 'Acirc' => '194',
- 'Atilde' => '195',
- 'Auml' => '196',
- 'Aring' => '197',
- 'AElig' => '198',
- );
- ;
- my $file = read_file("/tmp/xmls/$ARGV[0]");
- #my $dec = HTML::Entities::decode_entities($file);
- y $re = join '|', rev_nsort_by { length($b) <=> length($a) } keys %entity2char; $file =~ s/($re)/$entity2char{$1}/g;
- #print utf8::encode($file);
- my $stingData = $parser->parse_string( $file);
Add Comment
Please, Sign In to add comment