Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env perl
- #===============================================================================
- # FILE: img_br_ddownloader.pl
- # USAGE: ./img_br_ddownloader.pl
- # CREATED: 10/19/2015 20:55:55
- # modules: sudo cpan LWP
- # sudo cpan Getopt
- # example: ./img_br_ddownloader.pl --board vg
- # notes: please not change sleep time between http request,
- # to lower time and accelerate download pics, you can get ban
- # from mod-chan(because its can generate load on server).
- # thank you.
- # Perl-ver: 5.18
- #===============================================================================
- use strict;
- use warnings;
- use utf8;
- use LWP;
- use v5.18;
- use HTTP::Cookies;
- use List::MoreUtils qw(uniq);
- use Getopt::Long qw(GetOptions);
- my $board = "b";
- GetOptions('board=s' => $board) or die "usage: $0 --board b \n Default: b";
- my @downloaded_pics = ();
- my $err_log_file = "./errlog.txt";
- my $dir_to_save = './img_save' . "_" . $board;
- my $agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0";
- my $main_url = "http://iichan.hk/" . $board . "/";
- my $host_url = "http://iichan.hk";
- my %headers = (
- "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
- "Accept-Encoding" => "gzip, deflate",
- "Accept-Language" => "ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
- "Connection" => "keep-alive",
- );
- sub write_to_error_log {
- my ($message) = @_;
- my $time = time();
- open (ERRLOG, ">>", $err_log_file) or die "can't write to error log $!";
- print ERRLOG "$time: ERR: $message \n";
- print "$time: ERR: $message \n";
- }
- my $ua = LWP::UserAgent->new;
- $ua->timeout(10);
- $ua->agent($agent);
- $ua->cookie_jar;
- foreach my $key (keys %headers){
- $ua->default_header( $key => $headers{$key})
- }
- my @tmp_tread_url;
- my $page = 0;
- while(1){
- my $tmp_page = $page != 0 ? "$page.html" : "";
- my $response = $ua->get($main_url . $tmp_page);
- say "#### $main_url$tmp_page";
- if($response->is_success) {
- my $content = $response->decoded_content;
- my @tmp_urls = $content =~ /\/$board\/res\/\d+.html/g;
- @tmp_urls = uniq @tmp_urls;
- push @tmp_tread_url, @tmp_urls;
- say scalar @tmp_urls;
- $page++
- }
- else {
- say "exit: $tmp_page";
- write_to_error_log($tmp_page);
- last;
- }
- sleep 1;
- }
- @tmp_tread_url = uniq @tmp_tread_url;
- say scalar @tmp_tread_url;
- my @url_pics;
- foreach my $url (@tmp_tread_url){
- my $response = $ua->get($host_url . $url);
- say "####: $host_url . $url";
- if($response->is_success){
- my $content = $response->decoded_content;
- my @tmp_pics_urls = $content =~ /\/$board\/src\/\d+.(?:jpg|png|gif)/g;
- @tmp_pics_urls = uniq @tmp_pics_urls;
- say scalar @tmp_pics_urls;
- push @url_pics, @tmp_pics_urls;
- }
- else{
- say "ERR: $url";
- write_to_error_log("ERR GET TREAD $url");
- }
- sleep 1;
- }
- say scalar @url_pics;
- unless (-d $dir_to_save){
- mkdir $dir_to_save;
- }
- foreach my $url (@url_pics){
- my $file_name = $url;
- $file_name =~ s/\/$board\/src\///;
- my $save_path = $dir_to_save . "/" . $file_name;
- unless (-e $save_path){
- my $response = $ua->get($host_url . $url);
- say "#####: get $host_url$url";
- if($response->is_success){
- my $content = $response->decoded_content;
- open(FILE, ">", $save_path) or die "can't open or create file $!";
- print FILE $content;
- push @downloaded_pics, $file_name;
- sleep 1;
- }
- else {
- say "CAN'T get $host_url$url";
- write_to_error_log("CAN'T get $host_url$url");
- }
- }
- else{
- say "file exist, $save_path";
- }
- }
- my $totaly_pics = scalar @downloaded_pics;
- write_to_error_log("TOTALY SAVED: $totaly_pics pics");
Add Comment
Please, Sign In to add comment