Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- use Mojolicious::Lite;
- use Mojo::Util qw(url_escape url_unescape);
- use File::Path qw(make_path);
- use Getopt::Std;
- ##### Available colors and sizes on google image search ###
- my %color = (
- full => "ic:color", bw => "ic:gray", any => "",
- black => "ic:specific,isc:black", blue => "ic:specific,isc:blue",
- brown => "ic:specific,isc:brown", green => "ic:specific,isc:green",
- grey => "ic:specific,isc:grey", orange => "ic:specific,isc:orange",
- pink => "ic:specific,isc:pink", purple => "ic:specific,isc:purple",
- red => "ic:specific,isc:red", teal => "ic:specific,isc:teal",
- white => "ic:specific,isc:white", yellow => "ic:specific,isc:yellow",
- );
- my %size = (
- icon => "isz:i", medium => "isz:m", large => "isz:l", any => "",
- qsvga => "isz:lt,islt:qsvga", vga => "isz:lt,islt:vga",
- svga => "isz:lt,islt:svga", xga => "isz:lt,islt:xga",
- "2mp" => "isz:lt,islt:2mp", "4mp" => "isz:lt,islt:4mp",
- );
- # Get screen size
- my %screen = ( w => 1920, h => 1080 );
- @screen{"w", "h"} = ($1, $2) if `xrandr 2>&1` =~ /, current (\d+) x (\d+),/;
- ##### Get command line options (%opt) and set Search options (@tbs) ###
- # Help, Verbose, Urls-Only, Directory, Number, Parallel, Filetype, Size, Color
- my (%opt, @tbs);
- getopts('hvud:n:p:f:s:c:', \%opt);
- # Display help
- sub help {
- say "\n$_[0]" if defined $_[0];
- say qq!
- USAGE: $0 [options] search terms
- Get Google Images search results. Version 20150303
- -h Help, displays this message.
- -v Verbose output. Actually it's more like debug infos.
- -u Urls-only, displays the urls found and exits, don't download.
- -d Directory where the images will be saved.
- -n Number of urls to retrieve, between 1 and 100, defaults to 16.
- -p Number of parallel downloads, defaults to 16.
- -f Filetypes separated by commas, for example: png,gif
- -s Size, locally defaults to $screen{w}x$screen{h}. Can also be:
- icon medium large any
- qsvga (>480x300) vga (>640x480) svga (>800x600)
- xga (>1024x768) 2mp (>1600x1200) 4mp (>2272x1704)
- -c Color selected in:
- black blue brown green grey orange
- pink purple red teal white yellow
- color -> for full color images
- bw -> for black and white images
- any -> don't search colors (Default)
- The script is silent by default.
- The exit code is the number of images downloaded.
- EXAMPLES:
- $0 milkyway
- Save 16 $screen{w}x$screen{h} milkyway images in the current directory.
- $0 -n 50 -c bw -d "anime wall" bad apple
- Save 50 black&white images about bad apple in the "./anime wall" folder.
- $0 -vs 4mp macrophoto insects
- Verbosely save 10 images larger than 2272x1704 in the current folder.
- $0 -f gif animated
- Search animated wallpapers
- Have fun :)
- Based on the idea of Tyrell Rutledge, perl stuff by Yunga Palatino.
- See: http://reddit.com/r/commandline/2vog7b/
- All right reversed. Feel free to copy/modify/redistribute/print/eat/sell.
- PS: Use at your own risks. Computer may catch fire.
- !;
- exit -1;
- }
- help if $opt{h} or !(scalar(keys %opt) + $#ARGV + 1);
- # Color names
- if (defined $opt{c}) {
- $opt{c} = lc $opt{c};
- help("$0: Unknown color '$opt{c}'") unless defined $color{$opt{c}};
- push @tbs, $color{$opt{c}} if $color{$opt{c}};
- }
- # Size
- if (defined $opt{s}) {
- $opt{s} = lc $opt{s};
- if (defined $size{$opt{s}}) { # Named size option
- push @tbs, $size{$opt{s}} if $size{$opt{s}};
- } elsif ($opt{s} =~ /^(\d+)x(\d+)$/) { # WIDTHxHEIGHT
- push @tbs, "isz:ex,iszw:$1,iszh:$2";
- } else { # Huh?
- help("$0: Unknown size '$opt{s}'");
- }
- } else {
- push @tbs, "isz:ex,iszw:$screen{w},iszh:$screen{h}"; # Default
- }
- # Filetypes -- xxx: error checking against predefined filetypes?
- my $filetypes = "";
- $filetypes .= "%20" . join " ", map { "filetype:$_" } split /,/, $opt{f} if ($opt{f});
- # Number of image to download (!= to the number of url fetched later)
- my $numdown = 16 - 1;
- if (defined $opt{n}) {
- if ($opt{n} > 0) {
- $numdown = $opt{n} - 1;
- } else {
- exit -1;
- }
- }
- # Number of parallel downloads
- my $parallel = $opt{p} // 16;
- # Directory
- my $dir = ".";
- if (defined $opt{d}) {
- if (-d $opt{d} or make_path($opt{d})) {
- $dir = $opt{d};
- } else {
- say "$0: There was a problem creating '$opt{d}' directory.\n$!";
- exit -1;
- }
- }
- ##### Search images ###
- my $ua = Mojo::UserAgent->new(
- max_redirects => 10,
- inactivity_timeout => 30
- );
- # Make the search query-ckroll ;)
- my $q = "https://www.google.com/search?tbm=isch&"
- . "q=" . ($#ARGV == -1 ? "%52%69%63%6b%20%41%73%74%6c%65%79" : url_escape(join " ", @ARGV) )
- . $filetypes
- . "&tbs=" . join(",", @tbs);
- # Fetch the page, select all elements of class rg_l and capture the image url -- xxx: it doesn't seem to contain duplicates
- my @imgurl = $ua->get($q, {
- "User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 Chrome/24.0.1312.57"
- } )->res->dom(".rg_l")->map(sub { m!href="http://www.google.com/imgres\?imgurl=(.*?)&! and $1 })->each;
- # Print out some data, this should be called debug instead of verbose...
- if ($opt{v}) {
- say "Query: $q";
- # Check the number of image found
- if ($#imgurl == -1) {
- say "No result found.";
- exit 0;
- } elsif ($#imgurl == 0) {
- say "1 url found! We've got a GoogleWhack!!!";
- } else {
- say "Found $#imgurl urls.";
- }
- }
- $numdown = $#imgurl if $#imgurl < $numdown;
- # Display the urls and exits
- if (defined $opt{u}) {
- say (join "\n", @imgurl[0..$numdown]);
- exit $numdown;
- }
- ##### Fetch images in non-blocking way ###
- my $count = 0;
- my $downloader;
- $downloader = sub {
- my $id = shift;
- return if !(my $url = shift @imgurl) or $count > $numdown;
- $url = url_unescape($url) while $url ne url_unescape($url); # because it's escaped in the google link...
- $ua->get( $url, {
- "User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 Chrome/24.0.1312.57",
- "Referer" => $url, # you cannot link to our images, yeah, yeah, yeah xxx: make some white/blacklist of sites?
- } => sub {
- my ($ua, $tx) = @_;
- my $url = $tx->req->url;
- if ($numdown >= $count) {
- if (my $res = $tx->success) {
- # Try to get extension from mime-type
- my $mime;
- if (defined $res->headers->content_type) {
- $mime = $res->headers->content_type =~ m!image/(\w+);?\s*!i ? "$1" : "unknown";
- } else {
- $mime = "unknown";
- }
- # "Sanitize" url path, we may use it for the filename
- my $url_file = $url->path;
- $url_file = url_unescape($url_file) while $url_file ne url_unescape($url_file);
- $url_file =~ tr!a-zA-Z0-9_\-./!!cd; # we only keep those chars for filenames
- $url_file =~ s!\s+! !g; $url_file =~ s!^ !!; $url_file =~ s! $!!; # squeeze & trim whitespace
- $url_file =~ s!([_\-./])+!$1!g; # squeeze these too
- my $file;
- if ($url_file =~ m!^.*?/?([a-z0-9_.\-]+\.\w+)/?$!i) { # hostname - file.ext
- $file = $url->host . " - $1";
- } elsif ($url_file =~ m!^.*?/?([a-z0-9_\-.]+)/?$!i) { # hostname - file.mimetype
- $file = $url->host . " - $1.$mime";
- } else { # hostname - search query.mimetype
- $file = $url->host . " - @ARGV.$mime";
- }
- # Check if file exists, and search for index number the previous filename
- my ($duplicate, $index, $fullname) = (0, 0, "$dir/$file");
- while (-f $fullname) {
- if ( -s _ == $res->content->asset->size ) {
- $duplicate = 1;
- say "Got $url\n Skipped duplicate: $fullname (" . $res->content->asset->size . " bytes)" if $opt{v};
- last; # Same server, same name, same size, probably the same file, we skip.
- }
- if ($fullname =~ m!\.(\d+)\.(?:\w+)$!) {
- $index = $1 + 1;
- $fullname =~ s!\.(?:\d+)\.(\w+)$!.$index.$1!;
- } else {
- $index++;
- $fullname =~ s!\.(\w+)$!.$index.$1!;
- }
- }
- # Save file
- if (!$duplicate) {
- $count++;
- $res->content->asset->move_to("$fullname");
- say "Got $url\n Saved " . $res->content->asset->size . " bytes as $fullname" if $opt{v};
- }
- } elsif (defined $opt{v}) {
- my $err = $tx->error;
- say "Error: " . ($err->{code} // "") . " $err->{message} - $url";
- }
- if ($count > $numdown or !scalar @imgurl) {
- say "Downloaded $count image", $count > 1 ? "s" : "" if $opt{v};
- Mojo::IOLoop->stop;
- #Mojo::IOLoop->stop_gracefully;
- #Mojo::IOLoop->reset;
- #exit $count;
- } else {
- $downloader->($id) if @imgurl;
- }
- }
- });
- };
- $downloader->($_) for 1 .. $parallel;
- Mojo::IOLoop->start unless Mojo::IOLoop->is_running;
- exit $count;
Add Comment
Please, Sign In to add comment