Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env perl
- use strict;
- use warnings;
- use WWW::Mechanize;
- my $mech = new WWW::Mechanize(
- autocheck => 1
- );
- $mech->agent_alias('Windows IE 6');
- my $user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)';
- unless($ARGV[0]){
- print STDERR "You must specify a Wikimedia Category page\n";
- }
- print STDERR "Loading $ARGV[0]...\n";
- my $response = $mech->get($ARGV[0]);
- my %fetched_urls = ();
- if($mech->success()){
- print STDERR "Loaded category page...\n";
- my @links = $mech->find_all_links(url_regex => qr/Image\:/ );
- foreach my $link (@links){
- $mech->get($link->url());
- if($mech->success()){
- my $image_url = $mech->find_link(text => 'Full resolution');
- if($image_url){
- my $url = $image_url->url();
- unless(exists $fetched_urls{$url}){
- print STDERR "Downloading $url\n";
- system("curl -A \"$user_agent\" -O $url");
- if($? == -1) {
- print STDERR "\nfailed to execute: $!\n";
- exit();
- }
- elsif($? & 127) {
- printf STDERR "\nchild died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit();
- }
- elsif($?){
- printf STDERR "\nchild exited with value %d\n", $? >> 8;
- exit();
- }
- }
- $fetched_urls{$url} = 1;
- }
- }
- }
- }
- else{
- print STDERR "Couldn't load category page...\n";
- }
Add Comment
Please, Sign In to add comment