Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- #
- # This is a simple perl program to scrape pastebin.com
- #
- # It will save a copy of every new paste made in one file and will
- # save another file containing the meta data for the paste.
- #
- # Offered with no warranties, use at own risk. It is your
- # responsibility to make sure you are not disruptive to the API.
- #
- # The variables $scrape_rate and $fetch_rate can be changed to
- # adjust the speed. They are currently set to recommended rates.
- # The variable $data_dir can be changed to where you want the data
- # stored, this folder must exist.
- #
- # This program requires that you whitelist your IP with the API.
- #
- # Released to the public domain by Ryan Bushby.
- #
- use strict;
- use utf8;
- use JSON qw( from_json to_json );
- use LWP::Simple;
- no warnings 'utf8';
- my $data_dir = 'pastes';
- my $scrape_rate = 60;
- my $fetch_rate = 1;
- while( 1 ) {
- my $scrape_time = time() + $scrape_rate;
- my $j_scrape = get('http://pastebin.com/api_scraping.php?limit=100');
- my $data = from_json( $j_scrape );
- my $overlap;
- my $added;
- foreach my $record ( reverse @{$data} ) {
- my $key = $record->{key};
- my $filename_content = $data_dir.'/content_'.$record->{date}."_$key";
- my $filename_meta = "$data_dir/meta_$key";
- unless( -f( $filename_meta ) ) {
- print "Saving $key".($record->{'title'} ? " - ".$record->{title} : '')."\n";
- $record->{filename_content} = $filename_content;
- $record->{filename_meta} = $filename_meta;
- getstore( $record->{scrape_url}, $filename_content );
- open( META, ">$filename_meta" );
- print META ( to_json( $record, { pretty => 1 } ) );
- $added++;
- sleep $fetch_rate;
- } else {
- # print "Skipping already cached key: $key\n";
- $overlap++;
- }
- }
- print( "There were $added records added and $overlap records of overlap\n" );
- my $time_left = ($scrape_time - time());
- print ( "$time_left seconds to wait\n" ) if ( $time_left > 0 );
- do {
- sleep ( 1 );
- } until ( time() >= $scrape_time );
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement