Advertisement
theanonym

alterchan2.pl

Oct 19th, 2012
74
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl
  2. #----------------------------------------
  3. #
  4. # http://alterchan.net/b/res/31574.html
  5. #
  6. #----------------------------------------
  7.  
  8. use v5.10;
  9. use strict;
  10. use warnings;
  11. use autodie;
  12. use Coro;
  13. use Coro::LWP;
  14. use Coro::Semaphore;
  15. use LWP;
  16.  
  17. #----------------------------------------
  18. # Настройки
  19.  
  20. my $password = ""; # Пароль от любого аккаунта, его блеклист будет очищен
  21.  
  22. my $board    = "b"; # Доска
  23. my $pages    = 3;   # Количество сканируемых страниц начиная с 0
  24. my $thread   = 0;   # Если указано, то сканируется только один тред
  25.  
  26. my $target   = 0;           # Целевой пост
  27. my $outfile  = "posts.txt"; # Куда сохранить найденные посты
  28.  
  29. #----------------------------------------
  30.  
  31. my $lwp = new LWP::UserAgent;
  32. $lwp->agent( "Opera/9.80 (X11; Linux i686; U; en) Presto/2.10.289 Version/12.02" );
  33. $lwp->cookie_jar( {} );
  34.  
  35. my $sem = new Coro::Semaphore( 15 );
  36.  
  37. sub login($) {
  38.    my( $pass ) = @_;
  39.    say "Login";
  40.    my $res = $lwp->post( "http://alterchan.net/uid.php",
  41.       Content_Type => "application/x-www-form-urlencoded",
  42.       Content      => "action=login&pass1=$pass",
  43.    );
  44.    unless( $res->content =~ /Ist Gut/ ) {
  45.       die "Can't login:\n" . $res->content;
  46.    }
  47. }
  48.  
  49. sub add_to_blacklist($$) {
  50.    my( $board, $post ) = @_;
  51.    my $res = $lwp->post( "http://alterchan.net/uid.php",
  52.       Content_Type => "application/x-www-form-urlencoded",
  53.       Content      => "action=blacklist&number=$post&board=$board",
  54.    );
  55.    unless( $res->is_success ) {
  56.       die "Can't hide post '$board/$post':\n" . $res->as_string;
  57.    }
  58. }
  59.  
  60. sub clear_blacklist() {
  61.    my $res = $lwp->post( "http://alterchan.net/uid.php",
  62.       Content_Type => "application/x-www-form-urlencoded",
  63.       Content      => "action=erase",
  64.    );
  65.    unless( $res->is_success ) {
  66.       die "Can't erase blacklist:\n" . $res->as_string;
  67.    }
  68. }
  69.  
  70. sub get_page($$) {
  71.    my( $board, $page ) = @_;
  72.    say "Get page '$board/$page'";
  73.    my $url = "http://alterchan.net/$board/" . ( $page ? "$page.html" : "" );
  74.    my $res = $lwp->get( $url );
  75.    if( $res->is_success ) {
  76.       return $res->content;
  77.    } else {
  78.       die "Can't download page '$board/$page':\n" . $res->as_string;
  79.    }
  80. }
  81.  
  82. sub parse_threads($) {
  83.    my( $html ) = @_;
  84.    my @threads = $html =~ /^<div id="thread(\d+).+">/gm;
  85.    if( @threads ) {
  86.       return @threads;
  87.    } else {
  88.       die "No threads found";
  89.    }
  90. }
  91.  
  92. sub get_thread($$) {
  93.    my( $board, $thread ) = @_;
  94.    say "Get thread '$board/$thread'";
  95.    my $res = $lwp->get( "http://alterchan.net/$board/res/$thread.html" );
  96.    if( $res->is_success ) {
  97.       return $res->content;
  98.    } else {
  99.       die "Can't download thread '$board/$thread':\n" . $res->as_string;
  100.    }
  101. }
  102.  
  103. sub parse_posts($) {
  104.    my( $html ) = @_;
  105.    my %posts = $html =~ m/^<td class="reply" id="reply(\d+)">.*?^<blockquote>(.*?)^<\/blockquote>/gms;
  106.    for( values %posts ) {
  107.       s/^\s+|\s+$//g;
  108.       s/<.*?>//g;
  109.       s/&gt;/>/g;
  110.       s/&quot;/"/g;
  111.    }
  112.    return \%posts;
  113. }
  114.  
  115. sub parse_all_posts(@) {
  116.    my( @threads ) = @_;
  117.    my %threads;
  118.    my @workers;
  119.    for my $thread ( @threads ) {
  120.       push @workers, async {
  121.          $sem->down;
  122.          my @ret = ( $thread, parse_posts( get_thread( $board, $thread ) ) );
  123.          $sem->up;
  124.          return @ret;
  125.       };
  126.    }
  127.    for( @workers ) {
  128.       my( $thread, $posts ) = $_->join;
  129.       $threads{$thread} = $posts;
  130.    }
  131.    return \%threads;
  132. }
  133.  
  134. sub write_file($$) {
  135.    my( $fname, $data ) = @_;
  136.    open my $fh, ">", $fname;
  137.    if( syswrite( $fh, $data ) != length $data ) {
  138.       warn "File '$fname' written with errors";
  139.    }
  140. }
  141.  
  142. #----------------------------------------
  143.  
  144. login( $password );
  145. clear_blacklist();
  146.  
  147. my @all;
  148. if($thread) {
  149.    push @all, $thread;
  150. } else {
  151.    my @workers;
  152.    for my $page ( 0 .. $pages - 1 ) {
  153.       push @workers, async {
  154.          $sem->down;
  155.          my @ret = parse_threads( get_page( $board, $page ) );
  156.          $sem->up;
  157.          return @ret;
  158.       };
  159.    }
  160.    push @all, $_->join for @workers;
  161. }
  162.  
  163. my $threads = parse_all_posts( @all );
  164.  
  165. add_to_blacklist( $board, $target );
  166. my @after = map { keys %$_ } values %{ parse_all_posts( @all ) };
  167.  
  168. clear_blacklist();
  169.  
  170. my $text;
  171. for my $thread ( sort { $a <=> $b } keys %$threads ) {
  172.    for my $post ( sort { $a <=> $b } keys %{ $threads->{$thread} } ) {
  173.       next if $post ~~ @after;
  174.       $text .= "http://alterchan.net/$board/res/$thread.html\n";
  175.       $text .= "Тред $thread, пост $post:\n-----\n$threads->{$thread}->{$post}\n-----\n";
  176.    }
  177. }
  178.  
  179. write_file( $outfile, $text );
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement