SHARE
TWEET

Entity Cleaner

sweenig Jan 3rd, 2014 1,633 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl
  2. my $inputfile = @ARGV[0];
  3. my @out;
  4. my $linecounter=0;
  5. print "Cleaning HTML entities from $inputfile...\n";
  6. open (FILETOCLEAN, $inputfile) or die "Cannot open $inputfile: $!\n";
  7. while (<FILETOCLEAN>) {
  8.         chomp; #grab a line
  9.         $out[$linecounter] = $_;
  10.         $out[$linecounter] =~ s/&lt;/</g;
  11.         $out[$linecounter] =~ s/&gt;/>/g;
  12.         $out[$linecounter] =~ s/&quot;/"/g;
  13.         ++$linecounter;
  14. }
  15. close (FILETOCLEAN); #close the file
  16. my $outputfile;
  17. $outputfile=$inputfile;
  18. open (CLEANFILE, ">$outputfile");
  19. foreach my $line (@out) {
  20.         print CLEANFILE "$line\n";
  21.         ++$linecounter;
  22. }
  23. print "Cleaned $inputfile.\n";
  24. close (CLEANFILE);
RAW Paste Data
Top