Advertisement
techouse

useless_webpage_visitor.pl

Nov 21st, 2013
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 0.98 KB | None | 0 0
  1. #!/usr/bin/perl
  2. use warnings;
  3. use strict;
  4. use utf8;
  5. use LWP::UserAgent;
  6.  
  7. # get the file via ARGV
  8. die "Please point to a text file with links in the 1st argument!\n" unless $ARGV[0];
  9. my $csv_filename = $ARGV[0];
  10. # open the file; should be a plain text file; one url per line
  11. open(my $csv_FH, '<:encoding(utf8)', $csv_filename) or die $!;
  12.  
  13. # instantiate a UserAgent
  14. my $ua = LWP::UserAgent->new(
  15.     # set a fake Browser UserAgent string :)
  16.     agent => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.76 Safari/537.36',
  17.     # set a timeout in seconds
  18.     timeout => 10
  19. );
  20.  
  21. # read the file
  22. while (<$csv_FH>) {
  23.     # chop off the new line feed
  24.     chomp;
  25.     # check if vaild URL
  26.     next unless $_ =~ /^(?:[a-z0-9\-\._~!\$&'\(\)\*+,;=:\/?@]|%[0-9A-F]{2})*$/i;
  27.  
  28.     # trim the URL
  29.     $_ =~ s/^\s+|\s+$//g;
  30.  
  31.     # get it
  32.     my $website = $ua->get($_);
  33.     # remove it from memory
  34.     undef($website);
  35. }
  36.  
  37. close($csv_FH);
  38.  
  39. exit;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement