Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/perl
- use strict;
- use warnings;
- use LWP::UserAgent ();
- use Email::Send;
- use Email::Send::Gmail;
- use Email::Simple::Creator;
- use WWW::Pastebin::PastebinCom::API;
- use Reddit::Client;
- my $ChapterInfoFile = "ChapterInfoFile";
- #File that Contains the Book URL number then a tab and then the
- #most recent chapter number that your looking for extend as needed
- #Example for TMW shown below for chapter 1091
- #7834185605001405 1091
- my %Books =();
- #Hash of Books To Get with corresponding chapters to look for
- my $sender = Email::Send->new(
- { mailer => 'Gmail',
- mailer_args => [
- username => 'username',
- password => 'password',
- ]
- }
- );
- my $bin = WWW::Pastebin::PastebinCom::API->new(
- api_key => 'api_key');
- $bin->get_user_key("PasteBinUserName","PasteBinPassword");
- #Set up PasteBin API
- my $reddit = new Reddit::Client(
- user_agent => "Beep Boop Robot"
- );
- $reddit->get_token(
- client_id => "client_id",
- secret => "Secret",
- username => "RedditUsername",
- password => "RedditPassword"
- );
- #Set up Reddit API
- my $ua = LWP::UserAgent->new();
- $ua->agent("Beep Boop I'm a sneaky bot");
- $ua->timeout(15);
- $ua->ssl_opts( verify_hostname => 0);
- #LWP::UserAgent for grabbing HTML files
- while(1){
- %Books = ();
- #reset books array
- $file_data = "";
- {
- open(INFOFILE,"<",$ChapterInfoFile) or die "Cannot open ChapterInfoFile file";
- local $/ = undef;
- $file_data = <INFOFILE>;
- close INFOFILE;
- #Get all books from Chapter Info File
- }
- $file_data =~ s/\r/\n/g;
- $file_data =~ s/\v+/\n/g;
- my @BookList = split /\n/,$file_data;
- #One book per line
- foreach my $BookToGet (@BookList)
- {
- my ($BookNum,$ChapterNum) = split '\t',$BookToGet;
- $Books{$BookNum} = "$ChapterNum";
- #Set up Books array
- #It's in this format for easy manipulation of Character Info File
- #To add/remove books to grab depending on your preferences
- }
- my $url = "https://www.webnovel.com/feed/";
- #RSS Feed Website To Crawl
- my $response = "";
- my $info = "";
- ### Send URL request Through proxy for fun :)
- #$ua->proxy( 'http', 'ProxyURL']);
- $response = $ua->get("$url");
- if($response->is_success)#Success
- {
- $info = $response->content;
- $pass = 1;
- }
- else#website or internet down
- {
- print STDERR "Failed to Reach RSS Feed\n";
- my $email = Email::Simple->create(
- header => [
- From => 'FromAddress',
- To => 'ToAddress',
- Subject => "Failed to get RSS Feed",
- ],
- body => "Failed to Reach RSS Feed\nFix me plz",
- );
- $sender->send($email);
- #Send E-mail saying it failed :(
- }
- while ($info =~ m/<item>.*?<link>(.*?)<\/link>.*?\[CDATA\[(.*?), (.*?),(.*?),/gis)
- {
- #REGEX Matching for valid RSS Feed URL that goes through every update posted
- my $ChapterURL = $1;
- my $BookName = $2;
- my $ChapterName = $3;
- my $temp = $4;
- #Grabs the necessary information for each update
- if ($BookName =~ m/Full Marks Hidden/)
- {
- $BookName .= ", $ChapterName";
- $ChapterName = $temp;
- #Full marks hidden marriage has an extra , in the Book Name
- #Which means we have to go one extra capture group = $temp = $4
- }
- my @tempContents = split(' ',$ChapterName);
- my $ChapterNumber = $tempContents[1];
- @tempContents = split('/',$ChapterURL);
- my $Book = $tempContents[-2];
- $ChapterURL =~ s/rssbook/book/gis;
- $BookName =~ s/&\#39\;/\'/g;
- $ChapterName =~ s/&\#39\;/\'/g;
- #Fix some formatting stuff and get BookID number and Chapter Number
- if (exists $Books{$Book} && ($Books{$Book} == $ChapterNumber))
- {
- #NEW Chapter!
- my $newurl = "$ChapterURL";
- my $ChapterResponse = "";
- $ChapterResponse = $ua->get($newurl);
- #Try to get chapter html file
- if($ChapterResponse->is_success)
- {
- my $ChapterContent = $ChapterResponse->content;
- #Basically set the html file to read through
- if ($ChapterContent =~ m/<div class="cha-words">(.*?)<\/div>/s)
- {
- my $Contents = $1;
- $Contents =~ s/<p>//g;
- $Contents =~ s/<\/p>//g;
- $Contents =~ s/'/'/g;
- $Contents =~ s/"/"/g;
- $Contents =~ s/\h+/ /g;
- $Contents =~ s/\v+/\n\t/g;
- #Remove excess whitespace and format better
- my $PasteURL = $bin->paste(
- "$BookName $ChapterName \n$Contents",
- title => "$BookName $ChapterName",
- owned => 1
- );
- #Paste it to pastebin
- if (defined $PasteURL) #If paste to pastebin successful
- {
- $reddit->get_token(
- client_id => "client",
- secret => "ThisHereIsSecret",
- username => "RedditUserName",
- password => "RedditPassword"
- );
- my $redditPost = $reddit->submit_text(
- subreddit => 'QidianUnderground',
- title => "$BookName - Chapter $ChapterNumber",
- text => "\n[Chapter $ChapterNumber]($PasteURL)\n"
- );
- #Post to reddit with various Formatting stuff
- #To Be Implemented Soon
- #set_post_flair(
- # subreddit=>QidianUnderground,
- # post_id => $redditPost,
- # flair_template_id => WAITING FOR THIS);
- my $email = Email::Simple->create(
- header => [
- From => 'FromAddress',
- To => "ToAddress",
- Subject => "New Chapter! $BookName $ChapterName",
- ],
- body => "$BookName $ChapterName\n$PasteURL",
- );
- $sender->send($email);
- #Inform Whomever by e-mail we have a new post
- print STDERR "New Chapter of $BookName $ChapterName\n";
- $Books{$Book}++;
- #Update Books array to look for the new chapter
- }
- else
- {
- print STDERR "Failed at PasteBin :(";
- #SadFace
- }
- }
- else
- {
- print STDERR "They changed the format of their pages again :(\n";
- #More SadFace
- }
- }
- }
- }
- open(my $fh, '>',$ChapterInfoFile) or die "Cannot open Chapter Info File";
- foreach my $key (keys %Books)
- {
- print $fh "$key\t$Books{$key}\n";
- }
- close $fh;
- #Update Character Info File with new chapters to look for
- sleep(5*60+int(rand(100)));
- #Wait ~5-6:40 minutes so were "not" DDoS-ing webnovel.com/feed/ ;)
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement