Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/perl
- use strict;
- use POSIX;
- #use utf8;
- use List::Util qw(min max);
- use URI::Encode;
- my $uri = URI::Encode->new( { encode_reserved => 0 } );
- sub mht_encode {
- my ($s) = @_;
- utf8::decode($s);
- $s = $uri->encode($s);
- $s =~ s/%/=/g;
- return $s;
- }
- sub mht_decode {
- my ($s) = @_;
- $s =~ s/=/%/g;
- $s = $uri->decode($s);
- return $s;
- }
- sub test_unicode {
- my $s1 = '=D0=BC=D0=B8=D1=80=D0=BE=D0=BC';
- $s1 =~ s/=/%/g;
- $s1 = $uri->decode($s1);
- print $s1 . "\n";
- my $s2 = 'миром';
- #utf8::encode($s2);
- $s2 = $uri->encode($s2);
- $s2 =~ s/%/=/g;
- print $s2 . "\n";
- }
- sub unwrap_mht {
- my @lines = ();
- my @lines_ = ();
- my $line_ = '';
- for my $line (@lines) {
- $line =~ s/[\n\r]*$//;
- if ($line_ eq '') {
- $line_ = $line;
- } elsif ($line_ =~ m/=$/) {
- $line_ =~ s/=$//;
- $line_ = $line_ . $line;
- } else {
- push(@lines_,$line_);
- $line_ = $line;
- }
- }
- push(@lines_,$line_);
- }
- my ($pattern, @files) = @ARGV;
- my $pattern = mht_encode($pattern);
- #print $pattern . "\n";
- my $sample = 10;
- for my $file (@files) {
- open INPUT,"<:utf8",$file;
- my @lines = <INPUT>;
- close INPUT;
- #print $file . "\n";
- my @matched = ();
- my $i = 0;
- #for (my $i=0;$i<scalar(@lines);$i++) {
- while ($i<scalar(@lines)) {
- if (index($lines[$i],$pattern) > -1) {
- my $i1 = max(0,$i - int($sample/2));
- my $text = '';
- for (my $j=0;$j<$sample;$j++) {
- if ($i1+$j < scalar(@lines)) {
- my $s = $lines[$i1+$j];
- $s =~ s/\r?\n//;
- if ( $text =~ /=$/ ) {
- $text =~ s/=$//;
- $text = $text . $s;
- } else {
- $text = $text . "\n" . $s;
- }
- }
- }
- push(@matched,$text);
- $i = $i1+$sample-1;
- }
- $i = $i + 1;
- }
- print $file . ":" . mht_decode($_) . "\n" for @matched;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement