lazix

web_scrape.pl

Nov 19th, 2020 (edited)
92
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!usr/bin/perl
  2.  
  3. #shitty web scraper in perl
  4. #use strict;
  5. use warnings;
  6. use LWP 5.64;
  7. use HTTP::Cookies;
  8. sub main
  9. {
  10.     printf("What site would you like to scrape? (Please enter the full URL [I.E https://rms.org])");
  11.         my $url = <>;
  12.     my $ua = LWP::UserAgent->new();
  13.         my $cookies = HTTP::Cookies->new
  14.         (
  15.                 file => "cookies.txt", autosave => 1,
  16.  
  17.         );
  18.  
  19.         $ua->cookie_jar($cookies);
  20.  
  21.         $ua->agent("Windows IE 7");
  22.  
  23.         my $response = $ua->get($url);
  24.     unless($response->is_success)
  25.     {
  26.         warnings::warn("URL invalid - got " . $response->status_line . "\n")
  27.     }
  28.     unless ($response->content_type eq 'text/html')
  29.     {
  30.         printf("Expected HTML, got " . $response->content_type . "\n");
  31.     }
  32.     my $saved_content = "saved.html";
  33.     unless (open SAVED_CONTENT, '>'. $saved_content)
  34.     {
  35.             printf("Can not create $saved_content");
  36.            
  37.     }
  38.     binmode(SAVED_CONTENT, "utf:8");
  39.     printf(SAVED_CONTENT $response->decoded_content);
  40.     close SAVED_CONTENT;
  41.     printf("Saved " . $response->decoded_content . "of data");
  42. }
  43.  
  44. main();
  45.  
  46.  
RAW Paste Data