SHARE
TWEET

Mr A

a guest Sep 9th, 2009 231 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/perl
  2.  
  3. # SBS Playlist To RSS - v0.2.1
  4. # This script will download the ajax xml file containing the latest full episode videos added to the SBS.com.au site and convert this data into an RSS feed format.
  5.  
  6. # Originally adapted from the code at http://www.perl.com/pub/a/2001/11/15/creatingrss.html by Chris Ball.
  7.  
  8. # I declar this code to be in the public domain.
  9.  
  10. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  11. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  12. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  13. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  14. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  15. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  16. # THE SOFTWARE.
  17.  
  18. use strict;
  19. use warnings;
  20.  
  21. use LWP::Simple;                #for downloading over HTTP
  22. use HTML::TokeParser;   #to parse HTML files
  23. use XML::RSS;                   #to generate the RSS file
  24. use Date::Format;
  25. use XML::Mini::Document; #to grab data from XML files
  26.  
  27. # Variables that you should set,
  28. my $playlist = 94; #94=full ep, 95=sneekpeek
  29.  
  30. # Constants
  31. my %playlist_hash = (
  32.                      '94'    => 'Latest Full Episodes',
  33.                      '95'    => 'Latest Sneek Peek',
  34.                     );
  35.  
  36. my $playlisturl = "http://www.sbs.com.au/shows/ajax/getplaylist/playlistId/".$playlist."/";
  37. my $smil_baseurl = "http://player.sbs.com.au/video/smil/index/standalone/";
  38.  
  39. #Prepare some things...
  40. # LWP::Simple Download the playlist xml file using get();.
  41. my $playlistxml = get( $playlisturl ) or die $!;
  42.  
  43. # Create a TokeParser object, using our downloaded HTML.
  44. my $playlistxml_stream = HTML::TokeParser->new( \$playlistxml ) or die $!;
  45.  
  46. # Create the RSS object.
  47. my $rss = XML::RSS->new( version => '2.0' );
  48.  
  49. # Prep the RSS.
  50. $rss->channel(
  51.         title           => "SBS ".$playlist_hash{$playlist},
  52.         link            => $playlisturl,
  53.         language        => 'en',
  54.         lastBulidDate   => time2str("%a, %d %b %Y %T GMT", time),
  55.         );
  56.  
  57. $rss->image(
  58.         title   => "SBS ".$playlist_hash{$playlist},
  59.         url             => "http://www.sbs.com.au/web/images/sbslogo_footer.jpg",
  60.         link    => $playlisturl
  61.         );
  62.  
  63. # Declare variables.
  64. my ($tag);
  65. my (
  66.     $eptitle,                   #Episode Title
  67.     $epthumb,                   #URL of Episode Thumbnail Image
  68.     $eptime,                    #Time and Date
  69.     $filename_base,     #eg. "SRS_FE_Global_Village_Ep_19_44_48467"
  70.     $baseurl,                   #eg. "http://videocdn.sbs.com.au/u/video/" or "rtmp://specialbsc.fcod.llnwd.net/a1768/o21/"
  71.     $epcode,                    #eg. 48467
  72.     $delivery_protocol, #RTMP or HTTP
  73.     $img,                               #
  74.     $url128,                    #URL of 128K episode
  75.     $url300,                    #URL of 300K episode
  76.     $url1000,                   #URL of 1000K episode
  77.     $code1char,                 #Type of Episode (as char) SRS, DOC...
  78.     $code1                      #Type of Episode (as full string) Series, Documentary...
  79.    );
  80.  
  81. #get_tag skips forward in the HTML from our current position to the tag specified, and
  82. #get_trimmed_text  will grab plaintext from the current position to the end position specified.
  83.  
  84. # Find an <a> tag.
  85. while ( $tag = $playlistxml_stream->get_tag("a") ) {
  86.         # Inside this loop, $tag is at a <a> tag.
  87.         # But do we have a "title" token, too?
  88.         if ($tag->[1]{title}) {
  89.                 # We do!, for each item (video)...
  90.                 $eptitle = $tag->[1]{title};
  91.  
  92.                 # The next step is an <img></img> set.
  93.                 $tag = $playlistxml_stream->get_tag('img');
  94.                 $epthumb = $tag->[1]{src};
  95.                
  96.                 #get the flv filename from the img url
  97.                 #eg,
  98.                 #   $epthumb = http://videocdn.sbs.com.au/u/thumbnails/SRS_FE_Global_Village_Ep_19_44_48467.jpg
  99.                 #   $filename_base = SRS_FE_Global_Village_Ep_19_44_48467
  100.                 #   $epcode = 48467
  101.                 $filename_base = substr($epthumb, rindex($epthumb,"/") + 1, length($epthumb) - (rindex($epthumb,"/") + 1) - 4);
  102.                 $epcode = substr($filename_base, rindex($filename_base, "_") + 1, length($filename_base)-rindex($filename_base, "_") + 1);
  103.                
  104.                 # Now lookup the episode format (RTMP, HTTP) and file details.
  105.                 my $smilxml = get( $smil_baseurl.$epcode );
  106.                
  107.                 my $xmlDoc = XML::Mini::Document->new();
  108.                 $xmlDoc->parse($smilxml);
  109.                 my $xmlHash = $xmlDoc->toHash();
  110.                
  111.                 $baseurl = $xmlHash->{smil}{head}{meta}{base};
  112.                
  113.                 $delivery_protocol = substr($baseurl,0,4);
  114.                
  115.                 $url128 = $baseurl.$filename_base."_128K.flv";
  116.                 $url300 = $baseurl.$filename_base."_300K.flv";
  117.                 $url1000 = $baseurl.$filename_base."_1000K.flv";
  118.        
  119.                
  120.                 #SRS|DOC|MOV
  121.                 $code1char = substr($filename_base,0,3);
  122.  
  123.                 my %epcode_hash = (
  124.                     'DOC'    => 'Documentary',
  125.                     'MOV'    => 'Movie',
  126.                     'SRS'    => 'Series',
  127.                 );
  128.                
  129.                 if (exists($epcode_hash{$code1char})) { #in case we don't get a match we don't want an uninit var warning
  130.                         $code1 = $epcode_hash{$code1char};
  131.                 }else {
  132.                         $code1 = "";
  133.                 }
  134.                
  135.                 $playlistxml_stream->get_tag('a');
  136.                 $tag = $playlistxml_stream->get_tag('p');
  137.  
  138.                 # Now we can grab $eptime, by using get_trimmed_text up to the close of the <p> tag.
  139.                 $eptime = $playlistxml_stream->get_trimmed_text('/p');
  140.  
  141.                 # We need to escape ampersands, as they start entity references in XML.
  142.                 # Although we don't expect any..
  143.                 $eptime =~ s/&/&amp;/g;
  144.  
  145.                 if ($delivery_protocol eq "http") {
  146.                         # Add the item to the RSS feed.
  147.                         $rss->add_item(
  148.                                 title           => $eptitle,
  149.                                 permaLink       => $smil_baseurl.$epcode,
  150.                                 enclosure       => { url=>$url1000, type=>"video/x-flv"},
  151.                                 description     => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
  152.                                                 $eptitle<br />
  153.                                                 $eptime<br />
  154.                                                 Delivery: FLV over <b>HTTP</b><br />
  155.                                                 Links:  <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
  156.                                                 Type: $code1<br />]]>");       
  157.                 }elsif ($delivery_protocol eq "rtmp") {
  158.                         #no enclosure
  159.                         # Add the item to the RSS feed.
  160.                         $rss->add_item(
  161.                                 title           => $eptitle,
  162.                                 permaLink       => $smil_baseurl.$epcode,
  163.                                 description     => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
  164.                                                 $eptitle<br />
  165.                                                 $eptime<br />
  166.                                                 Delivery: FLV over <b>RTMP</b><br />
  167.                                                 Links:  <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
  168.                                                 Type: $code1<br />]]>");
  169.                 }
  170.  
  171.         }
  172. }
  173. print "Content-Type: application/xml; charset=ISO-8859-1"; # To help your browser display the feed better in your browser.
  174.  
  175. my $rssfilename = lc($playlist_hash{$playlist});
  176. $rssfilename =~ s/\s+//g;
  177. #$rss->save("sbs".$rssfilename.".rss"); #this will save the RSS XML feed to a file when you run the script.
  178. print $rss->as_string; #this will send the RSS XML feed to stdout when you run the script.
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Top