Advertisement
Qaltilon

RMS Bot

Feb 11th, 2012
3,442
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 8.19 KB | None | 0 0
  1. #!/usr/bin/perl
  2. #
  3. #       interjection.pl
  4. #
  5. #   Run in a loop, automating the following sequence of actions:
  6. #
  7. #   1.  Scan front page of selected boards, collect list of threads.
  8. #   2.  Scan thread for erroneous usage of "Linux", in the context of
  9. #       describing a complete operating system.
  10. #   3.  Interject with random Stallman picture and apt pasta, then sleep.
  11. #   4.  At the end of each sweep, sleep for a few minutes before repeating
  12. #       again, ad nauseum.
  13.  
  14. use warnings;
  15. use strict;
  16.  
  17. use LWP::UserAgent;
  18. use HTML::Form;
  19. use Data::Dumper;
  20. use DateTime;
  21. use Captcha::reCAPTCHA;
  22.  
  23. my @threads;
  24. my $output;
  25. my $iteration = 0;
  26. my %boards = ( g => 'zip' );                        # Hash containing boards to sweep.
  27. my $log_file = "$ENV{HOME}/log_interjection";
  28. my @ns_headers = (
  29.     'User-Agent' => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.0.9) Gecko/2009050519 Iceweasel/3.0.9 (Debian-3.0.9-1)',
  30.     'Accept-Charset' => 'iso-8859-1,*,utf-8',
  31.     'Accept-Language' => 'en-US',
  32. );
  33.  
  34. our $logging_enabled = 1;
  35. our $pic_path = "/home/derp/rms/";            # Directory holding delcious Stallman pictures
  36. our $scan_interval = 240;                           # Interval between each sweep of all boards
  37. our $min_post_interval = 30;                        # Minimum delay after each individual interjection
  38. our $post_interval_variation = 15;                  # Upper threshold of random additional delay after interjecting
  39.  
  40. our $total_posts = 0;
  41. our @handsome_rms_pics = <$pic_path*>;
  42. our @interjected;                                   # Track posts already responded to.
  43. our $browser = LWP::UserAgent->new;
  44. our $rms_pasta =<<FIN;
  45. I would like to interject for a moment. What you're refering to as Linux, is in fact, GNU/Linux, or as I've recently taken to calling it, GNU plus Linux. Linux is not an operating system unto itself, but rather another free component of a fully functioning GNU system made useful by the GNU corelibs, shell utilities and vital system components comprising a full OS as defined by POSIX.
  46.  
  47. Many computer users run a modified version of the GNU system every day, without realizing it. Through a peculiar turn of events, the version of GNU which is widely used today is often called "Linux", and many of its users are not aware that it is basically the GNU system, developed by the GNU Project.
  48.  
  49. There really is a Linux, and these people are using it, but it is just a part of the system they use. Linux is the kernel: the program in the system that allocates the machine's resources to the other programs that you run. The kernel is an essential part of an operating system, but useless by itself; it can only function in the context of a complete operating system. Linux is normally used in combination with the GNU operating system: the whole system is basically GNU with Linux added, or GNU/Linux. All the so-called "Linux" distributions are really distributions of GNU/Linux.
  50. FIN
  51.  
  52. open LOGGING, ">", $log_file or die $!;   # Log file location
  53. print LOGGING "...logging to $log_file\n";
  54.  
  55.  
  56.  
  57. &log_msg("### ------------ interjection.pl ------------ ###");
  58. &log_msg("###");
  59. &log_msg("### \$pic_path:\t\t\t$pic_path");
  60. &log_msg("### \$scan_interval:\t\t$scan_interval");
  61. &log_msg("### \$min_post_interval:\t\t$min_post_interval");
  62. &log_msg("### \$post_interval_variation:\t$post_interval_variation");
  63. &log_msg("###");
  64. &log_msg("### ----------------------------------------- ###");
  65. &log_msg("Entering main loop...");
  66.  
  67. while (1) {
  68.     &log_msg("Iteration $iteration");
  69.     for (sort keys %boards) {
  70. #       Aggregate listing of threads on front page of board,
  71. #       pass each thread to &scan_posts to read.
  72.  
  73.         my ($srvr, $board) = ($boards{$_}, $_);
  74.         my $board_url = "http://$boards{$board}.4chan.org/$board/imgboard.html";
  75.         my $page = ($browser->get($board_url, @ns_headers))->content;
  76.         $@ and print STDERR "$!\n";
  77.         push @threads, $page =~ /<span id="nothread(\d+)">/g;
  78.  
  79.         &scan_posts("http://$srvr.4chan.org/$board/res/$_.html") for @threads;
  80.     }
  81.  
  82.     &log_msg("Ending iteration $iteration. Will resume in $scan_interval seconds.\n");
  83.     sleep($scan_interval);  # long pause between sweeps.
  84.     $iteration++;
  85. }
  86. sub random_string(;$)
  87. {
  88.     my $length = shift || 8;
  89.     my @char = ('a' .. 'z', 'A' .. 'Z', 0 .. 9);
  90.  
  91.     my $string;
  92.     $string .= $char[rand @char] while ($length--);
  93.  
  94.     return $string;
  95. }
  96. sub invoke_curl($)
  97. {
  98.     my ($options) = shift;
  99.  
  100.     my $command = "curl $options --progress-bar -f ";
  101.     $output = `$command`;
  102.     print "\n";
  103.  
  104.     return $?;
  105. }
  106. sub scan_posts {
  107.     my $thread_url = shift;
  108.     my %posts;
  109.     my $page = ($browser->get($thread_url, @ns_headers))->content;
  110.  
  111.  
  112. #   'name' attribute holds post number, post body is inside blockquote tags.
  113.     %posts = $page =~
  114.         /<a name="(\d+)"><\/a>.*?<blockquote>(.*?)<\/blockquote>/gs;
  115.  
  116.  
  117.     for my $no (sort keys %posts) {
  118.         $_ = $posts{$no};
  119.                
  120. #       Strip any remaining tags in post body.
  121.         s/<.*?>.*?<\/.*?>//g;
  122.         s/<.*?>//g;
  123.  
  124.  
  125. #       If post contains 'Linux' or some obvious variant, not follwed
  126. #       by 'kernel' **AND** no mention of GNU/Linux or GNU plus Linux,
  127. #       then respond.
  128.  
  129.         if (/L\s*                       # (L)inux
  130.                 (
  131.                     i\W*n\W*u\W*     |  # L(inu)x
  132.                     u\W*n\W*i\W*     |  # L(uni)x
  133.                     o\W*o\W*n\W*i\W*    # L(ooni)x
  134.                 )
  135.             x                           # Linu(x)
  136.             (?!\s+kernel)/ix
  137.                 && ! /GNU\s*(\/|plus|with|and|\+)\s*(Linux|Lunix)/i) {
  138.  
  139.             my $transpose = $1 =~ /u\s*n\s*i\s*/;
  140.             next if grep {$_ == $no} @interjected;
  141.  
  142.             &log_msg("URL: $thread_url post: $no");
  143.             &log_msg("POST: $_");
  144.             &log_msg("* Transposed! *") if $transpose;
  145.  
  146.             &interject($thread_url, $no, $page, $transpose);
  147.             push @interjected, $no;
  148.             $total_posts++;
  149.             &log_msg("Interjection to post $no successful. Freedom delivered! Total posts: $total_posts");
  150.         }
  151.     }
  152. }
  153.  
  154. sub interject {
  155. #   Prepare pasta, fill form fields, find submit
  156. #   button and click it, then sleep for a semi-
  157. #   random amount of time.
  158.     chomp(my $os = `uname -s`);
  159.     return if (invoke_curl("http://www.google.com/recaptcha/api/challenge?k=6Ldp2bsSAAAAAAJ5uyx_lx34lJeEpTLVkP5k04qc"));
  160.    
  161.     my ($challenge) = $output =~ m/challenge : '([A-z0-9-]+)',/;
  162.     my $outfile = random_string() . ".jpg";
  163.     return if (invoke_curl("http://www.google.com/recaptcha/api/image?c=$challenge -o $outfile"));
  164.  
  165.     my $vericode;
  166.  
  167.         if ($os) {
  168.             print "Enter the CAPTCHA here:\n";
  169.             if ($os eq "Darwin") {
  170.                 system "qlmanage -p $outfile &> /dev/null &"; # Haven't tested this myself.
  171.             } elsif ($os eq "Linux") {
  172.                 system "display $outfile &> /dev/null &";
  173.             }
  174.         } else {
  175.             print "Open $outfile to see the CAPTCHA, then enter it here:\n";
  176.         }
  177.  
  178.         $vericode = <>; # Wait for input
  179.  
  180.         if ($os) {
  181.             system "pkill -f $outfile"; # Kills the program displaying the image
  182.         }
  183.  
  184.     # Reset the referrer and delete the image
  185.     unlink $outfile;
  186.  
  187.     my ($url, $post_no, $page, $transpose) = @_;
  188.     my ($form, $interjection, $submit_button, $pic);
  189.  
  190.     $interjection = ">>$post_no\n\n" . $rms_pasta;
  191.     $interjection =~ s/Linux/Lunix/g if $transpose;
  192.     $pic = &select_pic;
  193.     &log_msg("attached pic: $pic");  
  194.  
  195.     $form = HTML::Form->parse($page, $url);
  196.     $form->value('com', $interjection);
  197.     $form->value('recaptcha_challenge_field', $challenge);
  198.     $form->value('recaptcha_response_field', $vericode);
  199.     $form->value('upfile', $pic);
  200.     $submit_button = (grep {$_->type eq 'submit'} $form->inputs)[0];
  201.     $browser->request($submit_button->click($form));
  202.  
  203.     sleep($min_post_interval + rand($post_interval_variation));
  204. }
  205.  
  206. sub log_msg {
  207.     my $msg = shift;
  208.     exit if ! $logging_enabled;
  209.     my $now = DateTime->now;
  210.     syswrite LOGGING, $now->ymd . " " . $now->hms . ": $msg\n" or die $!;
  211. }
  212.  
  213. sub select_pic {
  214. #   Select a file from the array and remove its entry.
  215.  
  216.     log "No more sexy RMS pictures left... ;_;\n" && exit if ! @handsome_rms_pics;
  217.     return splice @handsome_rms_pics, int(rand(@handsome_rms_pics)), 1;
  218. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement