Advertisement
Guest User

Mr A

a guest
Sep 9th, 2009
599
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 6.47 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. # SBS Playlist To RSS - v0.2.1
  4. # This script will download the ajax xml file containing the latest full episode videos added to the SBS.com.au site and convert this data into an RSS feed format.
  5.  
  6. # Originally adapted from the code at http://www.perl.com/pub/a/2001/11/15/creatingrss.html by Chris Ball.
  7.  
  8. # I declar this code to be in the public domain.
  9.  
  10. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  11. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  12. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  13. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  14. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  15. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  16. # THE SOFTWARE.
  17.  
  18. use strict;
  19. use warnings;
  20.  
  21. use LWP::Simple;        #for downloading over HTTP
  22. use HTML::TokeParser;   #to parse HTML files
  23. use XML::RSS;           #to generate the RSS file
  24. use Date::Format;
  25. use XML::Mini::Document; #to grab data from XML files
  26.  
  27. # Variables that you should set,
  28. my $playlist = 94; #94=full ep, 95=sneekpeek
  29.  
  30. # Constants
  31. my %playlist_hash = (
  32.              '94'    => 'Latest Full Episodes',
  33.              '95'    => 'Latest Sneek Peek',
  34.             );
  35.  
  36. my $playlisturl = "http://www.sbs.com.au/shows/ajax/getplaylist/playlistId/".$playlist."/";
  37. my $smil_baseurl = "http://player.sbs.com.au/video/smil/index/standalone/";
  38.  
  39. #Prepare some things...
  40. # LWP::Simple Download the playlist xml file using get();.
  41. my $playlistxml = get( $playlisturl ) or die $!;
  42.  
  43. # Create a TokeParser object, using our downloaded HTML.
  44. my $playlistxml_stream = HTML::TokeParser->new( \$playlistxml ) or die $!;
  45.  
  46. # Create the RSS object.
  47. my $rss = XML::RSS->new( version => '2.0' );
  48.  
  49. # Prep the RSS.
  50. $rss->channel(
  51.     title           => "SBS ".$playlist_hash{$playlist},
  52.     link            => $playlisturl,
  53.     language        => 'en',
  54.     lastBulidDate   => time2str("%a, %d %b %Y %T GMT", time),
  55.     );
  56.  
  57. $rss->image(
  58.     title   => "SBS ".$playlist_hash{$playlist},
  59.     url     => "http://www.sbs.com.au/web/images/sbslogo_footer.jpg",
  60.     link    => $playlisturl
  61.     );
  62.  
  63. # Declare variables.
  64. my ($tag);
  65. my (
  66.     $eptitle,           #Episode Title
  67.     $epthumb,           #URL of Episode Thumbnail Image
  68.     $eptime,            #Time and Date
  69.     $filename_base,     #eg. "SRS_FE_Global_Village_Ep_19_44_48467"
  70.     $baseurl,           #eg. "http://videocdn.sbs.com.au/u/video/" or "rtmp://specialbsc.fcod.llnwd.net/a1768/o21/"
  71.     $epcode,            #eg. 48467
  72.     $delivery_protocol, #RTMP or HTTP
  73.     $img,               #
  74.     $url128,            #URL of 128K episode
  75.     $url300,            #URL of 300K episode
  76.     $url1000,           #URL of 1000K episode
  77.     $code1char,         #Type of Episode (as char) SRS, DOC...
  78.     $code1              #Type of Episode (as full string) Series, Documentary...
  79.    );
  80.  
  81. #get_tag skips forward in the HTML from our current position to the tag specified, and
  82. #get_trimmed_text  will grab plaintext from the current position to the end position specified.
  83.  
  84. # Find an <a> tag.
  85. while ( $tag = $playlistxml_stream->get_tag("a") ) {
  86.     # Inside this loop, $tag is at a <a> tag.
  87.         # But do we have a "title" token, too?
  88.     if ($tag->[1]{title}) {
  89.         # We do!, for each item (video)...
  90.         $eptitle = $tag->[1]{title};
  91.  
  92.         # The next step is an <img></img> set.
  93.         $tag = $playlistxml_stream->get_tag('img');
  94.         $epthumb = $tag->[1]{src};
  95.        
  96.         #get the flv filename from the img url
  97.         #eg,
  98.         #   $epthumb = http://videocdn.sbs.com.au/u/thumbnails/SRS_FE_Global_Village_Ep_19_44_48467.jpg
  99.         #   $filename_base = SRS_FE_Global_Village_Ep_19_44_48467
  100.         #   $epcode = 48467
  101.         $filename_base = substr($epthumb, rindex($epthumb,"/") + 1, length($epthumb) - (rindex($epthumb,"/") + 1) - 4);
  102.         $epcode = substr($filename_base, rindex($filename_base, "_") + 1, length($filename_base)-rindex($filename_base, "_") + 1);
  103.        
  104.         # Now lookup the episode format (RTMP, HTTP) and file details.
  105.         my $smilxml = get( $smil_baseurl.$epcode );
  106.        
  107.         my $xmlDoc = XML::Mini::Document->new();
  108.         $xmlDoc->parse($smilxml);
  109.         my $xmlHash = $xmlDoc->toHash();
  110.        
  111.         $baseurl = $xmlHash->{smil}{head}{meta}{base};
  112.        
  113.         $delivery_protocol = substr($baseurl,0,4);
  114.        
  115.         $url128 = $baseurl.$filename_base."_128K.flv";
  116.         $url300 = $baseurl.$filename_base."_300K.flv";
  117.         $url1000 = $baseurl.$filename_base."_1000K.flv";
  118.    
  119.        
  120.         #SRS|DOC|MOV
  121.         $code1char = substr($filename_base,0,3);
  122.  
  123.         my %epcode_hash = (
  124.             'DOC'    => 'Documentary',
  125.             'MOV'    => 'Movie',
  126.             'SRS'    => 'Series',
  127.         );
  128.        
  129.         if (exists($epcode_hash{$code1char})) { #in case we don't get a match we don't want an uninit var warning
  130.             $code1 = $epcode_hash{$code1char};
  131.         }else {
  132.             $code1 = "";
  133.         }
  134.        
  135.         $playlistxml_stream->get_tag('a');
  136.         $tag = $playlistxml_stream->get_tag('p');
  137.  
  138.         # Now we can grab $eptime, by using get_trimmed_text up to the close of the <p> tag.
  139.         $eptime = $playlistxml_stream->get_trimmed_text('/p');
  140.  
  141.         # We need to escape ampersands, as they start entity references in XML.
  142.         # Although we don't expect any..
  143.         $eptime =~ s/&/&amp;/g;
  144.  
  145.         if ($delivery_protocol eq "http") {
  146.             # Add the item to the RSS feed.
  147.             $rss->add_item(
  148.                 title       => $eptitle,
  149.                 permaLink   => $smil_baseurl.$epcode,
  150.                 enclosure   => { url=>$url1000, type=>"video/x-flv"},
  151.                 description     => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
  152.                         $eptitle<br />
  153.                         $eptime<br />
  154.                         Delivery: FLV over <b>HTTP</b><br />
  155.                         Links:  <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
  156.                         Type: $code1<br />]]>");   
  157.         }elsif ($delivery_protocol eq "rtmp") {
  158.             #no enclosure
  159.             # Add the item to the RSS feed.
  160.             $rss->add_item(
  161.                 title       => $eptitle,
  162.                 permaLink   => $smil_baseurl.$epcode,
  163.                 description     => "<![CDATA[<img src=\"$epthumb\" width=\"100\" height=\"56\" /><br />
  164.                         $eptitle<br />
  165.                         $eptime<br />
  166.                         Delivery: FLV over <b>RTMP</b><br />
  167.                         Links:  <a href=\"$url128\">128k</a>, <a href=\"$url300\">300k</a>, <a href=\"$url1000\">1000k</a><br />
  168.                         Type: $code1<br />]]>");
  169.         }
  170.  
  171.     }
  172. }
  173. print "Content-Type: application/xml; charset=ISO-8859-1"; # To help your browser display the feed better in your browser.
  174.  
  175. my $rssfilename = lc($playlist_hash{$playlist});
  176. $rssfilename =~ s/\s+//g;
  177. #$rss->save("sbs".$rssfilename.".rss"); #this will save the RSS XML feed to a file when you run the script.
  178. print $rss->as_string; #this will send the RSS XML feed to stdout when you run the script.
  179.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement