Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Convert a single Vbulletin forum index into a W3C valid ATOM feed.
- package vbulletin2atom;
- use base 'Exporter';
- use POSIX qw(strftime);
- use String::CRC32;
- use strict;
- our $VERSION = '1.10';
- our @EXPORT = qw(pver);
- my $atomFileData = '';
- my $atomFilename = '/home/pwtenny/devel.mediapundit.net/thephun.xml';
- my $forumUrl = 'http://forum.phun.org/forumdisplay.php?f=';
- my $forumId = 15;
- my $htmlFilename = 'phunindex.html';
- sub vb2a_run
- {
- # check time to see if allowed to run
- vb2a_check_run();
- vb2a_retrieve_html();
- vb2a_parse_html();
- vb2a_cleanup_html();
- vb2a_save_atom();
- }
- sub vb2a_retrieve_html
- {
- system qq(wget -q "$forumUrl$forumId" -O $htmlFilename);
- }
- # Parse a Vbulletin forum index for thread-by-thread data.
- sub vb2a_parse_html
- {
- my $forumHTML;
- my $threadBlock;
- open HTMLFD, $htmlFilename or die "Couldn't find the forum HTML file to parse: $htmlFilename\n";
- $forumHTML = join "", <HTMLFD>;
- close HTMLFD;
- # All threads on the page
- ($threadBlock) = $forumHTML =~ /<ol id=.threads. class=.threads.>(.*?)<\/ol>/igs;
- # One thread per loop.
- while($threadBlock =~ /<li class=\".*?\" id=\"thread_\d+\">(.*?)<\/li>/igs)
- {
- my $thread = $1;
- my $threadUrl;
- my $threadTopic;
- my $threadDate;
- # Get the post URL and topic.
- ($threadUrl,$threadTopic) = $thread =~ /<a class=\"title\" href=\"(.*?)\" id=\"thread_title_\d+\">(.*?)<\/a>/igs;
- ($threadUrl) = $1 =~ /\?t=(\d+)/;
- $threadUrl = "http://forum.phun.org/showthread.php?t=".$threadUrl;
- # Get the post date.
- ($threadDate) = $thread =~ /<a href=\"member.php\?u=[A-Za-z0-9?=&;].*\" class=\".*?\" title=\"Started by [A-Za-z0-9].* on (.*?)\">/igs;
- # Convert three different kind of dates into one format.
- $threadDate = vb2a_fixdate($threadDate);
- # Each entry requires a unique ID, so make a CRC from entry data that won't change.
- # Try using the actual thread ID to stop getting different CRCs and dupe msgs because of that.
- my $CRC32 = crc32($threadUrl);
- (my $threadId) = $threadUrl =~ /\?t=(\d+)$/;
- # Construct a valid ATOM entry and store it.
- $atomFileData .= << "ATOMENTRY";
- <entry>
- <title>$threadTopic</title>
- <link href="$threadUrl"/>
- <id>tag:forum.phun.org,2014://1.$threadId</id>
- <updated>$threadDate</updated>
- <summary type="html" xml:lang="en" xml:base="http://www.mediapundit.net/"><![CDATA[<a href="$threadUrl">$threadTopic</a>]]></summary>
- </entry>
- ATOMENTRY
- }
- }
- # Delete the old index that we gather data from before exiting.
- sub vb2a_cleanup_html
- {
- unlink $htmlFilename;
- }
- # Write out a valid ATOM feed.
- sub vb2a_save_atom
- {
- open AtomFile, ">$atomFilename";
- print AtomFile qq(<?xml version="1.0" encoding="utf-8"?>\n);
- print AtomFile << "FULLATOMFEED";
- <feed xmlns="http://www.w3.org/2005/Atom">
- <title>The Phun</title>
- <link rel="alternate" type="text/html" href="$forumUrl$forumId" />
- <link rel="self" type="application/atom+xml" href="http://devel.mediapundit.net/thephun.xml" />
- <updated>2014-12-13T18:30:02Z</updated>
- <author>
- <name>vbulletin2atom.pl</name>
- </author>
- <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
- $atomFileData
- </feed>
- FULLATOMFEED
- close AtomFile;
- }
- # Convert odd forum dates into real dates accepted by the ATOM spec.
- sub vb2a_fixdate
- {
- my $oldDate = shift;
- my $newDate;
- my $hour;
- my $minute;
- my $ampm;
- # Convert "Yesterday 3:15 PM" into a valid "<updated></updated>" ATOM date.
- if($oldDate =~ /Yesterday (\d+):(\d+) (AM|PM)/)
- {
- $hour = int($1);
- $minute = $2;
- $ampm = $3;
- $hour += 12 if($ampm =~ /PM/ && $hour != 12); # Convert 12hr to 24hr for ATOM, but not 12 PM which would become 24.
- $hour = "0" . int($hour) if($hour <= 9); # Zero pad a single digit hour, also for the ATOM spec. *TODO* Do this with sprintf?
- $newDate = strftime "%Y-%m-%d", localtime(time() - 86400); # year-month-day, minus 24 hours for "yesterday"
- $newDate .= "T"."$hour:$minute:00"."Z"; # Date + time in ATOM date/time format.
- return $newDate;
- }
- # Convert "Today 3:15 PM" into a valid "<updated></updated>" ATOM date.
- if($oldDate =~ /Today (\d+):(\d+) (AM|PM)/)
- {
- $hour = int($1);
- $minute = $2;
- $ampm = $3;
- $hour += 12 if($ampm =~ /PM/ && $hour != 12);
- $hour = "0" . int($hour) if($hour <= 9);
- $newDate = strftime "%Y-%m-%d", localtime;
- $newDate .= "T"."$hour:$minute:00"."Z";
- return $newDate;
- }
- # Convert stupid 12-31-2014 into 2014-12-31
- my $year;
- my $month;
- my $day;
- my $ampm;
- ($month,$day,$year,$hour,$minute,$ampm) = $oldDate =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+) (AM|PM)/;
- $hour += 12 if($ampm =~ /PM/ && $hour != 12);
- $hour = "0" . int($hour) if($hour <= 9);
- $newDate = "$year-$month-$day"."T"."$hour:$minute:00"."Z";
- return $newDate;
- }
- # Only run once per hour, at the top of the hour.
- sub vb2a_check_run
- {
- my $localtm = localtime()."";
- my ($minute) = $localtm =~ /^... ... \d+ \d+:(\d+):\d+/;
- return 1 if($minute != 00);
- }
- 1;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement