Guest User

iichan pic downloader

a guest
Oct 21st, 2015
242
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 4.00 KB | None | 0 0
  1. #!/usr/bin/env perl
  2. #===============================================================================
  3. #         FILE: img_br_ddownloader.pl
  4. #        USAGE: ./img_br_ddownloader.pl  
  5. #      CREATED: 10/19/2015 20:55:55
  6. #      modules: sudo cpan LWP
  7. #               sudo cpan Getopt
  8. #      example: ./img_br_ddownloader.pl --board vg
  9. #        notes: please not change sleep time between http request,
  10. #       to lower time and accelerate download pics, you can get ban
  11. #       from mod-chan(because its can generate load on server).
  12. #       thank you.
  13. #     Perl-ver: 5.18
  14. #===============================================================================
  15.  
  16. use strict;
  17. use warnings;
  18. use utf8;
  19. use LWP;
  20. use v5.18;
  21. use HTTP::Cookies;
  22. use List::MoreUtils qw(uniq);
  23. use Getopt::Long qw(GetOptions);
  24.  
  25. my $board = "b";
  26.  
  27. GetOptions('board=s' => $board) or die "usage: $0 --board b \n Default: b";
  28.  
  29.  
  30.  
  31. my @downloaded_pics = ();
  32. my $err_log_file = "./errlog.txt";
  33. my $dir_to_save = './img_save' . "_" . $board;
  34. my $agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0";
  35.  
  36. my $main_url = "http://iichan.hk/" . $board . "/";
  37. my $host_url = "http://iichan.hk";
  38. my %headers = (
  39.                 "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
  40.                 "Accept-Encoding"   => "gzip, deflate",
  41.                 "Accept-Language"   => "ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
  42.                 "Connection" => "keep-alive",
  43.               );
  44.  
  45. sub write_to_error_log {
  46.     my ($message) = @_;
  47.     my $time = time();
  48.     open (ERRLOG, ">>", $err_log_file) or die "can't write to error log $!";
  49.     print ERRLOG "$time: ERR: $message \n";
  50.     print "$time: ERR: $message \n";
  51. }
  52.  
  53.  
  54.  
  55. my $ua = LWP::UserAgent->new;
  56. $ua->timeout(10);
  57. $ua->agent($agent);
  58. $ua->cookie_jar;
  59. foreach my $key (keys %headers){
  60.     $ua->default_header( $key => $headers{$key})
  61. }
  62.  
  63. my @tmp_tread_url;
  64.  
  65. my $page = 0;
  66. while(1){
  67.     my $tmp_page = $page != 0 ? "$page.html" : "";
  68.     my $response = $ua->get($main_url . $tmp_page);
  69.     say "#### $main_url$tmp_page";
  70.     if($response->is_success) {
  71.         my $content = $response->decoded_content;
  72.         my @tmp_urls = $content =~ /\/$board\/res\/\d+.html/g;
  73.         @tmp_urls = uniq @tmp_urls;
  74.         push @tmp_tread_url, @tmp_urls;
  75.         say scalar @tmp_urls;
  76.         $page++
  77.     }
  78.     else {
  79.         say "exit: $tmp_page";
  80.         write_to_error_log($tmp_page);
  81.         last;
  82.     }
  83.     sleep 1;
  84. }
  85.  
  86. @tmp_tread_url = uniq @tmp_tread_url;
  87. say scalar @tmp_tread_url;
  88.  
  89.  
  90. my @url_pics;
  91.  
  92. foreach my $url (@tmp_tread_url){
  93.     my $response = $ua->get($host_url . $url);
  94.     say "####: $host_url . $url";
  95.     if($response->is_success){
  96.         my $content = $response->decoded_content;
  97.         my @tmp_pics_urls = $content =~ /\/$board\/src\/\d+.(?:jpg|png|gif)/g;
  98.         @tmp_pics_urls = uniq @tmp_pics_urls;
  99.         say scalar @tmp_pics_urls;
  100.         push @url_pics, @tmp_pics_urls;
  101.     }
  102.     else{
  103.         say "ERR: $url";
  104.         write_to_error_log("ERR GET TREAD $url");
  105.     }
  106.     sleep 1;
  107. }
  108.  
  109. say scalar @url_pics;
  110.  
  111. unless (-d $dir_to_save){
  112.     mkdir $dir_to_save;
  113. }
  114.  
  115. foreach my $url (@url_pics){
  116.     my $file_name = $url;
  117.     $file_name =~ s/\/$board\/src\///;
  118.     my $save_path = $dir_to_save . "/" . $file_name;
  119.     unless (-e $save_path){
  120.         my $response = $ua->get($host_url . $url);
  121.         say "#####: get $host_url$url";
  122.         if($response->is_success){
  123.             my $content = $response->decoded_content;
  124.             open(FILE, ">", $save_path) or die "can't open or create file $!";
  125.             print FILE $content;
  126.             push @downloaded_pics, $file_name;
  127.             sleep 1;
  128.         }
  129.         else {
  130.             say "CAN'T get $host_url$url";
  131.             write_to_error_log("CAN'T get $host_url$url");
  132.         }
  133.      }
  134.      else{
  135.          say "file exist, $save_path";
  136.      }
  137.     }
  138.  
  139. my $totaly_pics = scalar @downloaded_pics;
  140. write_to_error_log("TOTALY SAVED: $totaly_pics pics");
Add Comment
Please, Sign In to add comment