theanonym

alterchan.pl

Sep 14th, 2012
201
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 4.66 KB | None | 0 0
  1. #!/usr/bin/perl
  2. #----------------------------------------
  3. #
  4. # http://alterchan.net/b/res/31574.html
  5. #
  6. #----------------------------------------
  7.  
  8. use v5.10;
  9. use strict;
  10. use warnings;
  11. use autodie;
  12. use threads;
  13. use Thread::Semaphore;
  14. use LWP;
  15.  
  16. #----------------------------------------
  17. # Настройки
  18.  
  19. my $password = ""; # Пароль от любого аккаунта, его блеклист будет очищен
  20.  
  21. my $board    = "b"; # Доска
  22. my $pages    = 3;   # Количество сканируемых страниц начиная с 0
  23. my $thread   = 0;   # Если указано, то сканируется только один тред
  24.  
  25. my $target   = 0;           # Целевой пост
  26. my $outfile  = "posts.txt"; # Куда сохранить найденные посты
  27.  
  28. #----------------------------------------
  29.  
  30. my $lwp = new LWP::UserAgent;
  31. $lwp->agent( "Opera/9.80 (X11; Linux i686; U; en) Presto/2.10.289 Version/12.02" );
  32. $lwp->cookie_jar( {} );
  33.  
  34. my $sem = new Thread::Semaphore( 15 );
  35.  
  36. sub login($) {
  37.    my( $pass ) = @_;
  38.    say "Login";
  39.    my $res = $lwp->post( "http://alterchan.net/uid.php",
  40.       Content_Type => "application/x-www-form-urlencoded",
  41.       Content      => "action=login&pass1=$pass",
  42.    );
  43.    unless( $res->content =~ /Ist Gut/ ) {
  44.       die "Can't login:\n" . $res->content;
  45.    }
  46. }
  47.  
  48. sub add_to_blacklist($$) {
  49.    my( $board, $post ) = @_;
  50.    my $res = $lwp->post( "http://alterchan.net/uid.php",
  51.       Content_Type => "application/x-www-form-urlencoded",
  52.       Content      => "action=blacklist&number=$post&board=$board",
  53.    );
  54.    unless( $res->is_success ) {
  55.       die "Can't hide post '$board/$post':\n" . $res->as_string;
  56.    }
  57. }
  58.  
  59. sub clear_blacklist() {
  60.    my $res = $lwp->post( "http://alterchan.net/uid.php",
  61.       Content_Type => "application/x-www-form-urlencoded",
  62.       Content      => "action=erase",
  63.    );
  64.    unless( $res->is_success ) {
  65.       die "Can't erase blacklist:\n" . $res->as_string;
  66.    }
  67. }
  68.  
  69. sub get_page($$) {
  70.    my( $board, $page ) = @_;
  71.    say "Get page '$board/$page'";
  72.    my $url = "http://alterchan.net/$board/" . ( $page ? "$page.html" : "" );
  73.    my $res = $lwp->get( $url );
  74.    if( $res->is_success ) {
  75.       return $res->content;
  76.    } else {
  77.       die "Can't download page '$board/$page':\n" . $res->as_string;
  78.    }
  79. }
  80.  
  81. sub parse_threads($) {
  82.    my( $html ) = @_;
  83.    my @threads = $html =~ /^<div id="thread(\d+).+">/gm;
  84.    if( @threads ) {
  85.       return @threads;
  86.    } else {
  87.       die "No threads found";
  88.    }
  89. }
  90.  
  91. sub get_thread($$) {
  92.    my( $board, $thread ) = @_;
  93.    say "Get thread '$board/$thread'";
  94.    my $res = $lwp->get( "http://alterchan.net/$board/res/$thread.html" );
  95.    if( $res->is_success ) {
  96.       return $res->content;
  97.    } else {
  98.       die "Can't download thread '$board/$thread':\n" . $res->as_string;
  99.    }
  100. }
  101.  
  102. sub parse_posts($) {
  103.    my( $html ) = @_;
  104.    my %posts = $html =~ m/^<td class="reply" id="reply(\d+)">.*?^<blockquote>(.*?)^<\/blockquote>/gms;
  105.    for( values %posts ) {
  106.       s/^\s+|\s+$//g;
  107.       s/<.*?>//g;
  108.       s/&gt;/>/g;
  109.       s/&quot;/"/g;
  110.    }
  111.    return \%posts;
  112. }
  113.  
  114. sub parse_all_posts(@) {
  115.    my( @threads ) = @_;
  116.    my %threads;
  117.    my @workers = map {
  118.       new threads( sub {
  119.          $sem->down;
  120.          my @ret = ( $_, parse_posts( get_thread( $board, $_ ) ) );
  121.          $sem->up;
  122.          return @ret;
  123.       } );
  124.    } @threads;
  125.    for( @workers ) {
  126.       my( $thread, $posts ) = $_->join;
  127.       $threads{$thread} = $posts;
  128.    }
  129.    return \%threads;
  130. }
  131.  
  132. sub write_file($$) {
  133.    my( $fname, $data ) = @_;
  134.    open my $fh, ">", $fname;
  135.    if( syswrite( $fh, $data ) != length $data ) {
  136.       warn "File '$fname' written with errors";
  137.    }
  138. }
  139.  
  140. #----------------------------------------
  141.  
  142. login( $password );
  143. clear_blacklist();
  144.  
  145. my @all;
  146. if($thread) {
  147.    push @all, $thread;
  148. } else {
  149.    my @workers = map {
  150.       new threads ( sub {
  151.          $sem->down;
  152.          my @ret = parse_threads( get_page( $board, $_ ) );
  153.          $sem->up;
  154.          return @ret;
  155.       } );
  156.    } ( 0 .. $pages - 1 );
  157.    push @all, $_->join for @workers;
  158. }
  159.  
  160. my $threads = parse_all_posts( @all );
  161.  
  162. add_to_blacklist( $board, $target );
  163. my @after = map { keys %$_ } values %{ parse_all_posts( @all ) };
  164.  
  165. clear_blacklist();
  166.  
  167. my $text;
  168. for my $thread ( sort { $a <=> $b } keys %$threads ) {
  169.    for my $post ( sort { $a <=> $b } keys %{ $threads->{$thread} } ) {
  170.       next if $post ~~ @after;
  171.       $text .= "http://alterchan.net/$board/res/$thread.html\n";
  172.       $text .= "Тред $thread, пост $post:\n-----\n$threads->{$thread}->{$post}\n-----\n";
  173.    }
  174. }
  175.  
  176. write_file( $outfile, $text );
Advertisement
Add Comment
Please, Sign In to add comment