Yunga

ggimgs.pl

Feb 19th, 2015
331
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 8.83 KB | None | 0 0
  1. #!/usr/bin/perl
  2. use Mojolicious::Lite;
  3. use Mojo::Util qw(url_escape url_unescape);
  4. use File::Path qw(make_path);
  5. use Getopt::Std;
  6.  
  7.  
  8. ##### Available colors and sizes on google image search ###
  9. my %color = (
  10.     full => "ic:color", bw => "ic:gray", any => "",
  11.     black   => "ic:specific,isc:black",  blue   => "ic:specific,isc:blue",
  12.     brown   => "ic:specific,isc:brown",  green  => "ic:specific,isc:green",
  13.     grey    => "ic:specific,isc:grey",   orange => "ic:specific,isc:orange",
  14.     pink    => "ic:specific,isc:pink",   purple => "ic:specific,isc:purple",
  15.     red     => "ic:specific,isc:red",    teal   => "ic:specific,isc:teal",
  16.     white   => "ic:specific,isc:white",  yellow => "ic:specific,isc:yellow",
  17. );
  18.  
  19. my %size = (
  20.     icon  => "isz:i", medium => "isz:m", large => "isz:l", any => "",
  21.     qsvga => "isz:lt,islt:qsvga",        vga   => "isz:lt,islt:vga",
  22.     svga  => "isz:lt,islt:svga",         xga   => "isz:lt,islt:xga",
  23.     "2mp" => "isz:lt,islt:2mp",          "4mp" => "isz:lt,islt:4mp",
  24. );
  25.  
  26. # Get screen size
  27. my %screen = ( w => 1920, h => 1080 );
  28. @screen{"w", "h"} = ($1, $2) if `xrandr 2>&1` =~ /, current (\d+) x (\d+),/;
  29.  
  30.  
  31. ##### Get command line options (%opt) and set Search options (@tbs) ###
  32. # Help, Verbose, Urls-Only, Directory, Number, Parallel, Filetype, Size, Color
  33. my (%opt, @tbs);
  34. getopts('hvud:n:p:f:s:c:', \%opt);
  35.  
  36. # Display help
  37. sub help {
  38.     say "\n$_[0]" if defined $_[0];
  39.     say qq!
  40. USAGE: $0 [options] search terms
  41.  
  42.   Get Google Images search results. Version 20150303
  43.     -h   Help, displays this message.
  44.     -v   Verbose output. Actually it's more like debug infos.
  45.    -u   Urls-only, displays the urls found and exits, don't download.
  46.     -d   Directory where the images will be saved.
  47.     -n   Number of urls to retrieve, between 1 and 100, defaults to 16.
  48.     -p   Number of parallel downloads, defaults to 16.
  49.     -f   Filetypes separated by commas, for example: png,gif
  50.     -s   Size, locally defaults to $screen{w}x$screen{h}. Can also be:
  51.              icon  medium  large any
  52.              qsvga (>480x300)    vga   (>640x480)   svga  (>800x600)
  53.              xga   (>1024x768)   2mp   (>1600x1200) 4mp   (>2272x1704)
  54.     -c   Color selected in:
  55.              black   blue     brown   green   grey    orange
  56.              pink    purple   red     teal    white   yellow
  57.              color -> for full color images
  58.              bw    -> for black and white images
  59.              any   -> don't search colors (Default)
  60.  
  61.  The script is silent by default.
  62.  The exit code is the number of images downloaded.
  63.  
  64. EXAMPLES:
  65.  $0 milkyway
  66.      Save 16 $screen{w}x$screen{h} milkyway images in the current directory.
  67.  $0 -n 50 -c bw -d "anime wall" bad apple
  68.      Save 50 black&white images about bad apple in the "./anime wall" folder.
  69.  $0 -vs 4mp macrophoto insects
  70.      Verbosely save 10 images larger than 2272x1704 in the current folder.
  71.  $0 -f gif animated
  72.      Search animated wallpapers
  73.  
  74.                                                 Have fun :)
  75.  
  76.  Based on the idea of Tyrell Rutledge, perl stuff by Yunga Palatino.
  77.    See: http://reddit.com/r/commandline/2vog7b/
  78.  
  79.  All right reversed. Feel free to copy/modify/redistribute/print/eat/sell.
  80.  PS: Use at your own risks. Computer may catch fire.
  81.     !;
  82.     exit -1;
  83. }
  84.  
  85. help if $opt{h} or !(scalar(keys %opt) + $#ARGV + 1);
  86.  
  87. # Color names
  88. if (defined $opt{c}) {
  89.     $opt{c} = lc $opt{c};
  90.     help("$0: Unknown color '$opt{c}'") unless defined $color{$opt{c}};
  91.     push @tbs, $color{$opt{c}} if $color{$opt{c}};
  92. }
  93.  
  94. # Size
  95. if (defined $opt{s}) {
  96.     $opt{s} = lc $opt{s};
  97.  
  98.     if (defined $size{$opt{s}}) {                         # Named size option
  99.         push @tbs, $size{$opt{s}} if $size{$opt{s}};
  100.     } elsif ($opt{s} =~ /^(\d+)x(\d+)$/) {                # WIDTHxHEIGHT
  101.         push @tbs, "isz:ex,iszw:$1,iszh:$2";
  102.     } else {                                              # Huh?
  103.         help("$0: Unknown size '$opt{s}'");
  104.     }
  105. } else {
  106.     push @tbs, "isz:ex,iszw:$screen{w},iszh:$screen{h}";  # Default
  107. }
  108.  
  109. # Filetypes -- xxx: error checking against predefined filetypes?
  110. my $filetypes = "";
  111. $filetypes .= "%20" . join " ", map { "filetype:$_" } split /,/, $opt{f} if ($opt{f});
  112.  
  113. # Number of image to download (!= to the number of url fetched later)
  114. my $numdown = 16 - 1;
  115. if (defined $opt{n}) {
  116.     if ($opt{n} > 0) {
  117.         $numdown = $opt{n} - 1;
  118.     } else {
  119.         exit -1;
  120.     }
  121. }
  122.  
  123. # Number of parallel downloads
  124. my $parallel = $opt{p} // 16;
  125.  
  126. # Directory
  127. my $dir = ".";
  128. if (defined $opt{d}) {
  129.     if (-d $opt{d} or make_path($opt{d})) {
  130.         $dir = $opt{d};
  131.     } else {
  132.         say "$0: There was a problem creating '$opt{d}' directory.\n$!";
  133.         exit -1;
  134.     }
  135. }
  136.  
  137.  
  138. ##### Search images ###
  139. my $ua = Mojo::UserAgent->new(
  140.     max_redirects      => 10,
  141.     inactivity_timeout => 30
  142. );
  143.  
  144. # Make the search query-ckroll ;)
  145. my $q = "https://www.google.com/search?tbm=isch&"
  146.      . "q=" . ($#ARGV == -1 ? "%52%69%63%6b%20%41%73%74%6c%65%79" : url_escape(join " ", @ARGV) )
  147.      . $filetypes
  148.      . "&tbs=" . join(",", @tbs);
  149.  
  150. # Fetch the page, select all elements of class rg_l and capture the image url -- xxx: it doesn't seem to contain duplicates
  151. my @imgurl = $ua->get($q, {
  152.     "User-Agent"  => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 Chrome/24.0.1312.57"
  153. } )->res->dom(".rg_l")->map(sub { m!href="http://www.google.com/imgres\?imgurl=(.*?)&! and $1 })->each;
  154.  
  155. # Print out some data, this should be called debug instead of verbose...
  156. if ($opt{v}) {
  157.     say "Query: $q";
  158.  
  159.     # Check the number of image found
  160.     if ($#imgurl == -1) {
  161.         say "No result found.";
  162.         exit 0;
  163.     } elsif ($#imgurl == 0) {
  164.         say "1 url found! We've got a GoogleWhack!!!";
  165.     } else {
  166.         say "Found $#imgurl urls.";
  167.     }
  168. }
  169.  
  170. $numdown = $#imgurl if $#imgurl < $numdown;
  171.  
  172. # Display the urls and exits
  173. if (defined $opt{u}) {
  174.     say (join "\n", @imgurl[0..$numdown]);
  175.     exit $numdown;
  176. }
  177.  
  178.  
  179. ##### Fetch images in non-blocking way ###
  180. my $count = 0;
  181. my $downloader;
  182. $downloader = sub {
  183.     my $id = shift;
  184.     return if !(my $url = shift @imgurl) or $count > $numdown;
  185.     $url = url_unescape($url) while $url ne url_unescape($url); # because it's escaped in the google link...
  186.  
  187.     $ua->get( $url, {
  188.         "User-Agent"  => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 Chrome/24.0.1312.57",
  189.         "Referer"     => $url,  # you cannot link to our images, yeah, yeah, yeah xxx: make some white/blacklist of sites?
  190.     } => sub {
  191.         my ($ua, $tx) = @_;
  192.         my $url = $tx->req->url;
  193.  
  194.         if ($numdown >= $count) {
  195.             if (my $res = $tx->success) {
  196.                 # Try to get extension from mime-type
  197.                 my $mime;
  198.                 if (defined $res->headers->content_type) {
  199.                     $mime = $res->headers->content_type =~ m!image/(\w+);?\s*!i ? "$1" : "unknown";
  200.                 } else {
  201.                     $mime = "unknown";
  202.                 }
  203.  
  204.                 # "Sanitize" url path, we may use it for the filename
  205.                 my $url_file = $url->path;
  206.                 $url_file = url_unescape($url_file) while $url_file ne url_unescape($url_file);
  207.                 $url_file =~ tr!a-zA-Z0-9_\-./!!cd; # we only keep those chars for filenames
  208.                 $url_file =~ s!\s+! !g; $url_file =~ s!^ !!; $url_file =~ s! $!!; # squeeze & trim whitespace
  209.                 $url_file =~ s!([_\-./])+!$1!g;     # squeeze these too
  210.  
  211.                 my $file;
  212.                 if ($url_file =~ m!^.*?/?([a-z0-9_.\-]+\.\w+)/?$!i) { # hostname - file.ext
  213.                     $file = $url->host . " - $1";
  214.                 } elsif ($url_file =~ m!^.*?/?([a-z0-9_\-.]+)/?$!i) { # hostname - file.mimetype
  215.                     $file = $url->host . " - $1.$mime";
  216.                 } else {                                              # hostname - search query.mimetype
  217.                     $file = $url->host . " - @ARGV.$mime";
  218.                 }
  219.  
  220.                 # Check if file exists, and search for index number the previous filename
  221.                 my ($duplicate, $index, $fullname) = (0, 0, "$dir/$file");
  222.                 while (-f $fullname) {
  223.                     if ( -s _ == $res->content->asset->size ) {
  224.                         $duplicate = 1;
  225.                         say "Got $url\n  Skipped duplicate: $fullname (" . $res->content->asset->size . " bytes)" if $opt{v};
  226.                         last; # Same server, same name, same size, probably the same file, we skip.
  227.                     }
  228.  
  229.                     if ($fullname =~ m!\.(\d+)\.(?:\w+)$!) {
  230.                         $index = $1 + 1;
  231.                         $fullname =~ s!\.(?:\d+)\.(\w+)$!.$index.$1!;
  232.                     } else {
  233.                         $index++;
  234.                         $fullname =~ s!\.(\w+)$!.$index.$1!;
  235.                     }
  236.                 }
  237.  
  238.                 # Save file
  239.                 if (!$duplicate) {
  240.                     $count++;
  241.                     $res->content->asset->move_to("$fullname");
  242.                     say "Got $url\n  Saved " . $res->content->asset->size . " bytes as $fullname" if $opt{v};
  243.                 }
  244.  
  245.             } elsif (defined $opt{v}) {
  246.                 my $err = $tx->error;
  247.                 say "Error: " . ($err->{code} // "") . " $err->{message} - $url";
  248.             }
  249.  
  250.             if ($count > $numdown or !scalar @imgurl) {
  251.                 say "Downloaded $count image", $count > 1 ? "s" : "" if $opt{v};
  252.                 Mojo::IOLoop->stop;
  253.                 #Mojo::IOLoop->stop_gracefully;
  254.                 #Mojo::IOLoop->reset;
  255.                 #exit $count;
  256.             } else {
  257.                 $downloader->($id) if @imgurl;
  258.             }
  259.         }
  260.     });
  261. };
  262.  
  263. $downloader->($_) for 1 .. $parallel;
  264.  
  265. Mojo::IOLoop->start unless Mojo::IOLoop->is_running;
  266.  
  267. exit $count;
Add Comment
Please, Sign In to add comment