Mr A

#!/usr/bin/perl

# SBS Playlist To RSS - v0.2.1
# This script will download the ajax xml file containing the latest full episode videos added to the SBS.com.au site and convert this data into an RSS feed format.

# Originally adapted from the code at http://www.perl.com/pub/a/2001/11/15/creatingrss.html by Chris Ball.

# I declar this code to be in the public domain.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

use strict;
use warnings;

use LWP::Simple;        #for downloading over HTTP
use HTML::TokeParser;   #to parse HTML files
use XML::RSS;           #to generate the RSS file
use Date::Format;
use XML::Mini::Document; #to grab data from XML files

# Variables that you should set,
my $playlist = 94; #94=full ep, 95=sneekpeek

# Constants
my %playlist_hash = (
             '94'    => 'Latest Full Episodes',
             '95'    => 'Latest Sneek Peek',
            );

my $playlisturl = "http://www.sbs.com.au/shows/ajax/getplaylist/playlistId/".$playlist."/";
my $smil_baseurl = "http://player.sbs.com.au/video/smil/index/standalone/";

#Prepare some things...
# LWP::Simple Download the playlist xml file using get();.
my $playlistxml = get( $playlisturl ) or die $!;

# Create a TokeParser object, using our downloaded HTML.
my $playlistxml_stream = HTML::TokeParser->new( \$playlistxml ) or die $!;

# Create the RSS object.
my $rss = XML::RSS->new( version => '2.0' );

# Prep the RSS.
$rss->channel(
    title           => "SBS ".$playlist_hash{$playlist},
    link            => $playlisturl,
    language        => 'en',
    lastBulidDate   => time2str("%a, %d %b %Y %T GMT", time),
    );

$rss->image(
    title   => "SBS ".$playlist_hash{$playlist},
    url     => "http://www.sbs.com.au/web/images/sbslogo_footer.jpg",
    link    => $playlisturl
    );

# Declare variables.
my ($tag);
my (
    $eptitle,           #Episode Title
    $epthumb,           #URL of Episode Thumbnail Image
    $eptime,            #Time and Date
    $filename_base,     #eg. "SRS_FE_Global_Village_Ep_19_44_48467"
    $baseurl,           #eg. "http://videocdn.sbs.com.au/u/video/" or "rtmp://specialbsc.fcod.llnwd.net/a1768/o21/"
    $epcode,            #eg. 48467
    $delivery_protocol, #RTMP or HTTP
    $img,               #
    $url128,            #URL of 128K episode
    $url300,            #URL of 300K episode
    $url1000,           #URL of 1000K episode
    $code1char,         #Type of Episode (as char) SRS, DOC...
    $code1              #Type of Episode (as full string) Series, Documentary...
   );

#get_tag skips forward in the HTML from our current position to the tag specified, and
#get_trimmed_text  will grab plaintext from the current position to the end position specified.

# Find an <a> tag.
while ( $tag = $playlistxml_stream->get_tag("a") ) {
    # Inside this loop, $tag is at a <a> tag.
        # But do we have a "title" token, too?
    if ($tag->[1]{title}) {
        # We do!, for each item (video)...
        $eptitle = $tag->[1]{title};

        # The next step is an <img></img> set.
        $tag = $playlistxml_stream->get_tag('img');
        $epthumb = $tag->[1]{src};

        #get the flv filename from the img url
        #eg,
        #   $epthumb = http://videocdn.sbs.com.au/u/thumbnails/SRS_FE_Global_Village_Ep_19_44_48467.jpg
        #   $filename_base = SRS_FE_Global_Village_Ep_19_44_48467
        #   $epcode = 48467
        $filename_base = substr($epthumb, rindex($epthumb,"/") + 1, length($epthumb) - (rindex($epthumb,"/") + 1) - 4);
        $epcode = substr($filename_base, rindex($filename_base, "_") + 1, length($filename_base)-rindex($filename_base, "_") + 1);

        # Now lookup the episode format (RTMP, HTTP) and file details.
        my $smilxml = get( $smil_baseurl.$epcode );

        my $xmlDoc = XML::Mini::Document->new();
        $xmlDoc->parse($smilxml);
        my $xmlHash = $xmlDoc->toHash();

        $baseurl = $xmlHash->{smil}{head}{meta}{base};

        $delivery_protocol = substr($baseurl,0,4);

        $url128 = $baseurl.$filename_base."_128K.flv";
        $url300 = $baseurl.$filename_base."_300K.flv";
        $url1000 = $baseurl.$filename_base."_1000K.flv";


        #SRS|DOC|MOV
        $code1char = substr($filename_base,0,3);

        my %epcode_hash = (
            'DOC'    => 'Documentary',
            'MOV'    => 'Movie',
            'SRS'    => 'Series',
        );

        if (exists($epcode_hash{$code1char})) { #in case we don't get a match we don't want an uninit var warning
            $code1 = $epcode_hash{$code1char};
        }else {
            $code1 = "";
        }

        $playlistxml_stream->get_tag('a');
        $tag = $playlistxml_stream->get_tag('p');

        # Now we can grab $eptime, by using get_trimmed_text up to the close of the <p> tag.
        $eptime = $playlistxml_stream->get_trimmed_text('/p');

        # We need to escape ampersands, as they start entity references in XML.
        # Although we don't expect any..
        $eptime =~ s/&/&amp;/g;

        if ($delivery_protocol eq "http") {
            # Add the item to the RSS feed.
            $rss->add_item(
                title       => $eptitle,
                permaLink   => $smil_baseurl.$epcode,
                enclosure   => { url=>$url1000, type=>"video/x-flv"},
                description     => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
                        $eptitle<br />
                        $eptime<br />
                        Delivery: FLV over <b>HTTP</b><br />
                        Links:  <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
                        Type: $code1<br />]]>");
        }elsif ($delivery_protocol eq "rtmp") {
            #no enclosure
            # Add the item to the RSS feed.
            $rss->add_item(
                title       => $eptitle,
                permaLink   => $smil_baseurl.$epcode,
                description     => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
                        $eptitle<br />
                        $eptime<br />
                        Delivery: FLV over <b>RTMP</b><br />
                        Links:  <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
                        Type: $code1<br />]]>");
        }

    }
}
print "Content-Type: application/xml; charset=ISO-8859-1"; # To help your browser display the feed better in your browser.

my $rssfilename = lc($playlist_hash{$playlist});
$rssfilename =~ s/\s+//g;
#$rss->save("sbs".$rssfilename.".rss"); #this will save the RSS XML feed to a file when you run the script.
print $rss->as_string; #this will send the RSS XML feed to stdout when you run the script.