#!/usr/bin/perl
# SBS Playlist To RSS - v0.2.1
# This script will download the ajax xml file containing the latest full episode videos added to the SBS.com.au site and convert this data into an RSS feed format.
# Originally adapted from the code at http://www.perl.com/pub/a/2001/11/15/creatingrss.html by Chris Ball.
# I declar this code to be in the public domain.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
use strict;
use warnings;
use LWP::Simple; #for downloading over HTTP
use HTML::TokeParser; #to parse HTML files
use XML::RSS; #to generate the RSS file
use Date::Format;
use XML::Mini::Document; #to grab data from XML files
# Variables that you should set,
my $playlist = 94; #94=full ep, 95=sneekpeek
# Constants
my %playlist_hash = (
'94' => 'Latest Full Episodes',
'95' => 'Latest Sneek Peek',
);
my $playlisturl = "http://www.sbs.com.au/shows/ajax/getplaylist/playlistId/".$playlist."/";
my $smil_baseurl = "http://player.sbs.com.au/video/smil/index/standalone/";
#Prepare some things...
# LWP::Simple Download the playlist xml file using get();.
my $playlistxml = get
( $playlisturl ) or die $!;
# Create a TokeParser object, using our downloaded HTML.
my $playlistxml_stream = HTML
::TokeParser->new( \$playlistxml ) or die $!;
# Create the RSS object.
my $rss = XML::RSS->new( version => '2.0' );
# Prep the RSS.
$rss->channel(
title => "SBS ".$playlist_hash{$playlist},
language => 'en',
lastBulidDate
=> time2str
("%a, %d %b %Y %T GMT", time),
);
$rss->image(
title => "SBS ".$playlist_hash{$playlist},
url => "http://www.sbs.com.au/web/images/sbslogo_footer.jpg",
);
# Declare variables.
my ($tag);
my (
$eptitle, #Episode Title
$epthumb, #URL of Episode Thumbnail Image
$eptime, #Time and Date
$filename_base, #eg. "SRS_FE_Global_Village_Ep_19_44_48467"
$baseurl, #eg. "http://videocdn.sbs.com.au/u/video/" or "rtmp://specialbsc.fcod.llnwd.net/a1768/o21/"
$epcode, #eg. 48467
$delivery_protocol, #RTMP or HTTP
$img, #
$url128, #URL of 128K episode
$url300, #URL of 300K episode
$url1000, #URL of 1000K episode
$code1char, #Type of Episode (as char) SRS, DOC...
$code1 #Type of Episode (as full string) Series, Documentary...
);
#get_tag skips forward in the HTML from our current position to the tag specified, and
#get_trimmed_text will grab plaintext from the current position to the end position specified.
# Find an <a> tag.
while ( $tag = $playlistxml_stream->get_tag("a") ) {
# Inside this loop, $tag is at a <a> tag.
# But do we have a "title" token, too?
if ($tag->[1]{title}) {
# We do!, for each item (video)...
$eptitle = $tag->[1]{title};
# The next step is an <img></img> set.
$tag = $playlistxml_stream->get_tag('img');
$epthumb = $tag->[1]{src};
#get the flv filename from the img url
#eg,
# $epthumb = http://videocdn.sbs.com.au/u/thumbnails/SRS_FE_Global_Village_Ep_19_44_48467.jpg
# $filename_base = SRS_FE_Global_Village_Ep_19_44_48467
# $epcode = 48467
$epcode = substr($filename_base, rindex($filename_base, "_") + 1
, length($filename_base)-rindex($filename_base, "_") + 1
);
# Now lookup the episode format (RTMP, HTTP) and file details.
my $smilxml = get( $smil_baseurl.$epcode );
my $xmlDoc = XML::Mini::Document->new();
$xmlDoc->parse($smilxml);
my $xmlHash = $xmlDoc->toHash();
$baseurl = $xmlHash->{smil}{head}{meta}{base};
$delivery_protocol = substr($baseurl,0
,4
);
$url128 = $baseurl.$filename_base."_128K.flv";
$url300 = $baseurl.$filename_base."_300K.flv";
$url1000 = $baseurl.$filename_base."_1000K.flv";
#SRS|DOC|MOV
$code1char = substr($filename_base,0
,3
);
my %epcode_hash = (
'DOC' => 'Documentary',
'MOV' => 'Movie',
'SRS' => 'Series',
);
if (exists($epcode_hash{$code1char})) { #in case we don't get a match we don't want an uninit var warning
$code1 = $epcode_hash{$code1char};
}else {
$code1 = "";
}
$playlistxml_stream->get_tag('a');
$tag = $playlistxml_stream->get_tag('p');
# Now we can grab $eptime, by using get_trimmed_text up to the close of the <p> tag.
$eptime = $playlistxml_stream->get_trimmed_text('/p');
# We need to escape ampersands, as they start entity references in XML.
# Although we don't expect any..
$eptime =~ s/&/&/g;
if ($delivery_protocol eq "http") {
# Add the item to the RSS feed.
$rss->add_item(
title => $eptitle,
permaLink => $smil_baseurl.$epcode,
enclosure => { url=>$url1000, type=>"video/x-flv"},
description => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
$eptitle<br />
$eptime<br />
Delivery: FLV over <b>HTTP</b><br />
Links: <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
Type: $code1<br />]]>");
}elsif ($delivery_protocol eq "rtmp") {
#no enclosure
# Add the item to the RSS feed.
$rss->add_item(
title => $eptitle,
permaLink => $smil_baseurl.$epcode,
description => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
$eptitle<br />
$eptime<br />
Delivery: FLV over <b>RTMP</b><br />
Links: <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
Type: $code1<br />]]>");
}
}
}
print "Content-Type: application/xml; charset=ISO-8859-1"; # To help your browser display the feed better in your browser.
my $rssfilename = lc($playlist_hash{$playlist});
$rssfilename =~ s/\s+//g;
#$rss->save("sbs".$rssfilename.".rss"); #this will save the RSS XML feed to a file when you run the script.
print $rss->as_string; #this will send the RSS XML feed to stdout when you run the script.