Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/perl
- # SBS Playlist To RSS - v0.2.1
- # This script will download the ajax xml file containing the latest full episode videos added to the SBS.com.au site and convert this data into an RSS feed format.
- # Originally adapted from the code at http://www.perl.com/pub/a/2001/11/15/creatingrss.html by Chris Ball.
- # I declar this code to be in the public domain.
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- # THE SOFTWARE.
- use strict;
- use warnings;
- use LWP::Simple; #for downloading over HTTP
- use HTML::TokeParser; #to parse HTML files
- use XML::RSS; #to generate the RSS file
- use Date::Format;
- use XML::Mini::Document; #to grab data from XML files
- # Variables that you should set,
- my $playlist = 94; #94=full ep, 95=sneekpeek
- # Constants
- my %playlist_hash = (
- '94' => 'Latest Full Episodes',
- '95' => 'Latest Sneek Peek',
- );
- my $playlisturl = "http://www.sbs.com.au/shows/ajax/getplaylist/playlistId/".$playlist."/";
- my $smil_baseurl = "http://player.sbs.com.au/video/smil/index/standalone/";
- #Prepare some things...
- # LWP::Simple Download the playlist xml file using get();.
- my $playlistxml = get( $playlisturl ) or die $!;
- # Create a TokeParser object, using our downloaded HTML.
- my $playlistxml_stream = HTML::TokeParser->new( \$playlistxml ) or die $!;
- # Create the RSS object.
- my $rss = XML::RSS->new( version => '2.0' );
- # Prep the RSS.
- $rss->channel(
- title => "SBS ".$playlist_hash{$playlist},
- link => $playlisturl,
- language => 'en',
- lastBulidDate => time2str("%a, %d %b %Y %T GMT", time),
- );
- $rss->image(
- title => "SBS ".$playlist_hash{$playlist},
- url => "http://www.sbs.com.au/web/images/sbslogo_footer.jpg",
- link => $playlisturl
- );
- # Declare variables.
- my ($tag);
- my (
- $eptitle, #Episode Title
- $epthumb, #URL of Episode Thumbnail Image
- $eptime, #Time and Date
- $filename_base, #eg. "SRS_FE_Global_Village_Ep_19_44_48467"
- $baseurl, #eg. "http://videocdn.sbs.com.au/u/video/" or "rtmp://specialbsc.fcod.llnwd.net/a1768/o21/"
- $epcode, #eg. 48467
- $delivery_protocol, #RTMP or HTTP
- $img, #
- $url128, #URL of 128K episode
- $url300, #URL of 300K episode
- $url1000, #URL of 1000K episode
- $code1char, #Type of Episode (as char) SRS, DOC...
- $code1 #Type of Episode (as full string) Series, Documentary...
- );
- #get_tag skips forward in the HTML from our current position to the tag specified, and
- #get_trimmed_text will grab plaintext from the current position to the end position specified.
- # Find an <a> tag.
- while ( $tag = $playlistxml_stream->get_tag("a") ) {
- # Inside this loop, $tag is at a <a> tag.
- # But do we have a "title" token, too?
- if ($tag->[1]{title}) {
- # We do!, for each item (video)...
- $eptitle = $tag->[1]{title};
- # The next step is an <img></img> set.
- $tag = $playlistxml_stream->get_tag('img');
- $epthumb = $tag->[1]{src};
- #get the flv filename from the img url
- #eg,
- # $epthumb = http://videocdn.sbs.com.au/u/thumbnails/SRS_FE_Global_Village_Ep_19_44_48467.jpg
- # $filename_base = SRS_FE_Global_Village_Ep_19_44_48467
- # $epcode = 48467
- $filename_base = substr($epthumb, rindex($epthumb,"/") + 1, length($epthumb) - (rindex($epthumb,"/") + 1) - 4);
- $epcode = substr($filename_base, rindex($filename_base, "_") + 1, length($filename_base)-rindex($filename_base, "_") + 1);
- # Now lookup the episode format (RTMP, HTTP) and file details.
- my $smilxml = get( $smil_baseurl.$epcode );
- my $xmlDoc = XML::Mini::Document->new();
- $xmlDoc->parse($smilxml);
- my $xmlHash = $xmlDoc->toHash();
- $baseurl = $xmlHash->{smil}{head}{meta}{base};
- $delivery_protocol = substr($baseurl,0,4);
- $url128 = $baseurl.$filename_base."_128K.flv";
- $url300 = $baseurl.$filename_base."_300K.flv";
- $url1000 = $baseurl.$filename_base."_1000K.flv";
- #SRS|DOC|MOV
- $code1char = substr($filename_base,0,3);
- my %epcode_hash = (
- 'DOC' => 'Documentary',
- 'MOV' => 'Movie',
- 'SRS' => 'Series',
- );
- if (exists($epcode_hash{$code1char})) { #in case we don't get a match we don't want an uninit var warning
- $code1 = $epcode_hash{$code1char};
- }else {
- $code1 = "";
- }
- $playlistxml_stream->get_tag('a');
- $tag = $playlistxml_stream->get_tag('p');
- # Now we can grab $eptime, by using get_trimmed_text up to the close of the <p> tag.
- $eptime = $playlistxml_stream->get_trimmed_text('/p');
- # We need to escape ampersands, as they start entity references in XML.
- # Although we don't expect any..
- $eptime =~ s/&/&/g;
- if ($delivery_protocol eq "http") {
- # Add the item to the RSS feed.
- $rss->add_item(
- title => $eptitle,
- permaLink => $smil_baseurl.$epcode,
- enclosure => { url=>$url1000, type=>"video/x-flv"},
- description => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
- $eptitle<br />
- $eptime<br />
- Delivery: FLV over <b>HTTP</b><br />
- Links: <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
- Type: $code1<br />]]>");
- }elsif ($delivery_protocol eq "rtmp") {
- #no enclosure
- # Add the item to the RSS feed.
- $rss->add_item(
- title => $eptitle,
- permaLink => $smil_baseurl.$epcode,
- description => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
- $eptitle<br />
- $eptime<br />
- Delivery: FLV over <b>RTMP</b><br />
- Links: <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
- Type: $code1<br />]]>");
- }
- }
- }
- print "Content-Type: application/xml; charset=ISO-8859-1"; # To help your browser display the feed better in your browser.
- my $rssfilename = lc($playlist_hash{$playlist});
- $rssfilename =~ s/\s+//g;
- #$rss->save("sbs".$rssfilename.".rss"); #this will save the RSS XML feed to a file when you run the script.
- print $rss->as_string; #this will send the RSS XML feed to stdout when you run the script.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement