Advertisement
overloop

search_mht.pl

Nov 6th, 2014
222
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.77 KB | None | 0 0
  1. #!/bin/perl
  2.  
  3. use strict;
  4. use POSIX;
  5. #use utf8;
  6.  
  7. use List::Util qw(min max);
  8. use URI::Encode;
  9. my $uri = URI::Encode->new( { encode_reserved => 0 } );
  10.  
  11. sub mht_encode {
  12.     my ($s) = @_;
  13.     utf8::decode($s);
  14.     $s = $uri->encode($s);
  15.     $s =~ s/%/=/g;
  16.     return $s;
  17. }
  18.  
  19. sub mht_decode {
  20.     my ($s) = @_;
  21.     $s =~ s/=/%/g;
  22.     $s = $uri->decode($s);
  23.     return $s;
  24. }
  25.  
  26. sub test_unicode {
  27.     my $s1 = '=D0=BC=D0=B8=D1=80=D0=BE=D0=BC';
  28.     $s1 =~ s/=/%/g;
  29.     $s1 = $uri->decode($s1);
  30.     print $s1 . "\n";
  31.     my $s2 = 'миром';
  32.     #utf8::encode($s2);
  33.     $s2 = $uri->encode($s2);
  34.     $s2 =~ s/%/=/g;
  35.     print $s2 . "\n";
  36. }
  37.  
  38. sub unwrap_mht {
  39.     my @lines = ();
  40.     my @lines_ = ();
  41.     my $line_ = '';
  42.     for my $line (@lines) {
  43.         $line =~ s/[\n\r]*$//;
  44.         if ($line_ eq '') {
  45.             $line_ = $line;
  46.         } elsif ($line_ =~ m/=$/) {
  47.             $line_ =~ s/=$//;
  48.             $line_ = $line_ . $line;
  49.         } else {
  50.             push(@lines_,$line_);
  51.             $line_ = $line;
  52.         }
  53.     }
  54.     push(@lines_,$line_);
  55. }
  56.  
  57. my ($pattern, @files) = @ARGV;
  58.  
  59. my $pattern = mht_encode($pattern);
  60.  
  61. #print $pattern . "\n";
  62.  
  63. my $sample = 10;
  64.  
  65. for my $file (@files) {
  66.     open INPUT,"<:utf8",$file;
  67.     my @lines = <INPUT>;
  68.     close INPUT;
  69.     #print $file . "\n";
  70.    
  71.     my @matched = ();
  72.     my $i = 0;
  73.     #for (my $i=0;$i<scalar(@lines);$i++) {
  74.     while ($i<scalar(@lines)) {
  75.         if (index($lines[$i],$pattern) > -1) {
  76.             my $i1 = max(0,$i - int($sample/2));
  77.             my $text = '';
  78.             for (my $j=0;$j<$sample;$j++) {
  79.                 if ($i1+$j < scalar(@lines)) {
  80.                     my $s = $lines[$i1+$j];
  81.                     $s =~ s/\r?\n//;
  82.                     if ( $text =~ /=$/ ) {
  83.                         $text =~ s/=$//;
  84.                         $text = $text . $s;
  85.                     } else {
  86.                         $text = $text . "\n" . $s;
  87.                     }
  88.                 }
  89.             }
  90.             push(@matched,$text);
  91.             $i = $i1+$sample-1;
  92.         }
  93.         $i = $i + 1;
  94.     }
  95.     print $file . ":" . mht_decode($_) . "\n" for @matched;
  96. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement