Advertisement
sweenig

Entity Cleaner

Jan 3rd, 2014
1,877
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 0.63 KB | None | 0 0
  1. #!/usr/bin/perl
  2. my $inputfile = @ARGV[0];
  3. my @out;
  4. my $linecounter=0;
  5. print "Cleaning HTML entities from $inputfile...\n";
  6. open (FILETOCLEAN, $inputfile) or die "Cannot open $inputfile: $!\n";
  7. while (<FILETOCLEAN>) {
  8.     chomp; #grab a line
  9.     $out[$linecounter] = $_;
  10.     $out[$linecounter] =~ s/&lt;/</g;
  11.     $out[$linecounter] =~ s/&gt;/>/g;
  12.     $out[$linecounter] =~ s/&quot;/"/g;
  13.     ++$linecounter;
  14. }
  15. close (FILETOCLEAN); #close the file
  16. my $outputfile;
  17. $outputfile=$inputfile;
  18. open (CLEANFILE, ">$outputfile");
  19. foreach my $line (@out) {
  20.     print CLEANFILE "$line\n";
  21.     ++$linecounter;
  22. }
  23. print "Cleaned $inputfile.\n";
  24. close (CLEANFILE);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement