daily pastebin goal
19%
SHARE
TWEET

PodGrab.py

a guest Oct 6th, 2011 189 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env python
  2.  
  3. # PodGrab - A Python command line audio/video podcast downloader for RSS XML feeds.
  4. # Supported RSS item file types: MP3, M4V, OGG, FLV, MP4, MPG/MPEG, WMA, WMV, WEBM
  5. # Version: 1.1.2 - 06/10/2011
  6. # Jonathan Baker
  7. # jon@the-node.org (http://the-node.org)
  8.  
  9. # Werner Avenant - added small changes to write M3U file of podcasts downloaded today
  10. # werner.avenant@gmail.com (http://www.collectiveminds.co.za)
  11.  
  12. # Do with this code what you will, it's "open source". As a courtesy,
  13. # I would appreciate credit if you base your code on mine. If you find
  14. # a bug or think the code sucks balls, please let me know :-)
  15.  
  16. # Outstanding issues:-
  17. # - Video podcasts which which are not direct URLs and are modified by PodGrab
  18. #   in order to be grabbed won't display their size as the filenames haven't
  19. #   been stripped of their garbage URL info yet. It'll say 0 bytes, but don't
  20. #   worry, they've downloaded.
  21.  
  22.  
  23. import os
  24. import sys
  25. import argparse
  26. import urllib2
  27. import xml.dom.minidom
  28. import datetime
  29. from time import gmtime, strftime, strptime, mktime
  30. import sqlite3
  31. import shutil
  32. import smtplib
  33. from email.mime.text import MIMEText
  34. import platform
  35. import traceback
  36. import unicodedata
  37.  
  38.  
  39. MODE_NONE = 70
  40. MODE_SUBSCRIBE = 71
  41. MODE_DOWNLOAD = 72
  42. MODE_UNSUBSCRIBE = 73
  43. MODE_LIST = 74
  44. MODE_UPDATE = 75
  45. MODE_MAIL_ADD = 76
  46. MODE_MAIL_DELETE = 77
  47. MODE_MAIL_LIST = 78
  48. MODE_EXPORT = 79
  49. MODE_IMPORT = 80
  50.  
  51.  
  52. DOWNLOAD_DIRECTORY = "podcasts"
  53.  
  54. # Added 2011-10-06 Werner Avenant - added current_dictory here so it can be global
  55. current_directory = ''
  56. m3u_file = ''
  57.  
  58.  
  59. total_item = 0
  60. total_size = 0
  61. has_error = 0
  62.  
  63.  
  64. def main(argv):
  65.         mode = MODE_NONE
  66.         has_error = 0
  67.         num_podcasts = 0
  68.         error_string = ""
  69.         feed_url = ""
  70.         feed_name = ""
  71.         mail_address = ""
  72.         message = ""
  73.         mail = ""
  74.         # Added 2011-10-06 Werner Avenant
  75.         global current_directory
  76.         global m3u_file
  77.         now = datetime.datetime.now();
  78.         m3u_file = str(now)[:10] + '.m3u'
  79.         current_directory = os.path.realpath(os.path.dirname(sys.argv[0]))
  80.         download_directory = current_directory + os.sep + DOWNLOAD_DIRECTORY
  81.  
  82.  
  83.         global total_items
  84.         global total_size
  85.         total_items = 0
  86.         total_size = 0
  87.         data = ""
  88.  
  89.         parser = argparse.ArgumentParser(description='A command line Podcast downloader for RSS XML feeds')
  90.         parser.add_argument('-s', '--subscribe', action="store", dest="sub_feed_url", help='Subscribe to the following XML feed and download latest podcast')
  91.         parser.add_argument('-d', '--download', action="store", dest="dl_feed_url", help='Bulk download all podcasts in the following XML feed or file')
  92.         parser.add_argument('-un', '--unsubscribe', action="store", dest="unsub_url", help='Unsubscribe from the following Podcast feed')
  93.         parser.add_argument('-ma', '--mail-add', action="store", dest="mail_address_add", help='Add a mail address to mail subscription updates to')
  94.         parser.add_argument('-md', '--mail-delete', action="store", dest="mail_address_delete", help='Delete a mail address')
  95.  
  96.         parser.add_argument('-l', '--list', action="store_const", const="ALL", dest="list_subs", help='Lists current Podcast subscriptions')
  97.         parser.add_argument('-u', '--update', action="store_const", const="UPDATE", dest="update_subs", help='Updates all current Podcast subscriptions')
  98.         parser.add_argument('-ml', '--mail-list', action="store_const", const="MAIL", dest="list_mail", help='Lists all current mail addresses')
  99.  
  100.         parser.add_argument('-io', '--import', action="store", dest="opml_import", help='Import subscriptions from OPML file')
  101.         parser.add_argument('-eo', '--export', action="store_const", const="OPML_EXPORT", dest="opml_export", help='Export subscriptions to OPML file')
  102.        
  103.         arguments = parser.parse_args()
  104.        
  105.         if arguments.sub_feed_url:
  106.                 feed_url = arguments.sub_feed_url
  107.                 data = open_datasource(feed_url)
  108.                 if not data:
  109.                         error_string = "Not a valid XML file or URL feed!"
  110.                         has_error = 1
  111.                 else:
  112.                         print "XML data source opened\n"
  113.                         mode = MODE_SUBSCRIBE
  114.         elif arguments.dl_feed_url:
  115.                 feed_url = arguments.dl_feed_url
  116.                 data = open_datasource(feed_url)
  117.                 if not data:
  118.                         error_string = "Not a valid XML file or URL feed!"
  119.                         has_error = 1
  120.                 else:
  121.                         print "XML data source opened\n"
  122.                         mode = MODE_DOWNLOAD
  123.         elif arguments.unsub_url:
  124.                 feed_url = arguments.unsub_url
  125.                 mode = MODE_UNSUBSCRIBE
  126.         elif arguments.list_subs:
  127.                 mode = MODE_LIST
  128.         elif arguments.update_subs:
  129.                 mode = MODE_UPDATE
  130.         elif arguments.mail_address_add:
  131.                 mail_address = arguments.mail_address_add
  132.                 mode = MODE_MAIL_ADD
  133.         elif arguments.mail_address_delete:
  134.                 mail_address = arguments.mail_address_delete
  135.                 mode = MODE_MAIL_DELETE
  136.         elif arguments.list_mail:
  137.                 mode = MODE_MAIL_LIST
  138.         elif arguments.opml_import:
  139.                 import_file_name = arguments.opml_import
  140.                 mode = MODE_IMPORT
  141.         elif arguments.opml_export:
  142.                 mode = MODE_EXPORT
  143.         else:
  144.                 error_string = "No Arguments supplied - for usage run 'PodGrab.py -h'"
  145.                 has_error = 1
  146.         print "Default encoding: " + sys.getdefaultencoding()
  147.         todays_date = strftime("%a, %d %b %Y %H:%M:%S", gmtime())
  148.         print "Current Directory: ", current_directory
  149.         if does_database_exist(current_directory):
  150.                 connection = connect_database(current_directory)
  151.                 if not connection:
  152.                         error_string = "Could not connect to PodGrab database file!"
  153.                         has_error = 1
  154.                 else:
  155.                         cursor = connection.cursor()
  156.         else:
  157.                 print "PodGrab database missing. Creating..."
  158.                 connection = connect_database(current_directory)
  159.                 if not connection:
  160.                         error_string = "Could not create PodGrab database file!"
  161.                         has_error = 1
  162.                 else:
  163.                         print "PodGrab database created"
  164.                         cursor = connection.cursor()
  165.                         setup_database(cursor, connection)
  166.                         print "Database setup complete"
  167.         if not os.path.exists(download_directory):
  168.                 print "Podcast download directory is missing. Creating..."
  169.                 try:
  170.                         os.mkdir(download_directory)
  171.                         print "Download directory '" + download_directory + "' created"
  172.                 except OSError:
  173.                         error_string = "Could not create podcast download sub-directory!"
  174.                         has_error = 1
  175.         else:
  176.                 print "Download directory exists: '" + download_directory + "'"
  177.         if not has_error:
  178.                 if mode == MODE_UNSUBSCRIBE:
  179.                         feed_name = get_name_from_feed(cursor, connection, feed_url)
  180.                         if feed_name == "None":
  181.                                 print "Feed does not exist in the database! Skipping..."
  182.                         else:
  183.                                 feed_name = clean_string(feed_name)
  184.                                 channel_directory = download_directory + os.sep + feed_name
  185.                                 print "Deleting '" + channel_directory + "'..."
  186.                                 delete_subscription(cursor, connection, feed_url)
  187.                                 try :
  188.                                         shutil.rmtree(channel_directory)
  189.                                 except OSError:
  190.                                         print "Subscription directory has not been found - it might have been manually deleted"
  191.                                 print "Subscription '" + feed_name + "' removed"
  192.                 elif mode == MODE_LIST:
  193.                         print "Listing current podcast subscriptions...\n"
  194.                         list_subscriptions(cursor, connection)
  195.                 elif mode == MODE_UPDATE:
  196.                         print "Updating all podcast subscriptions..."
  197.                         subs = get_subscriptions(cursor, connection)
  198.                         for sub in subs:
  199.                                 feed_name = sub[0]
  200.                                 feed_url = sub[1]
  201.                                 print "Feed for subscription: '" + feed_name + "' from '" + feed_url + "' is updating..."
  202.                                 data = open_datasource(feed_url)
  203.                                 if not data:
  204.                                         print "'" + feed_url + "' for '" + feed_name + "' is not a valid feed URL!"
  205.                                 else:
  206.                                         message = iterate_feed(data, mode, download_directory, todays_date, cursor, connection, feed_url)
  207.                                         print message
  208.                                         mail += message
  209.                         mail = mail + "\n\n" + str(total_items) + " podcasts totalling " + str(total_size) + " bytes have been downloaded."
  210.                         if has_mail_users(cursor, connection):
  211.                                 print "Have e-mail address(es) - attempting e-mail..."
  212.                                 mail_updates(cursor, connection, mail, str(total_items))
  213.                 elif mode == MODE_DOWNLOAD or mode == MODE_SUBSCRIBE:
  214.                         print iterate_feed(data, mode, download_directory, todays_date, cursor, connection, feed_url)
  215.                 elif mode == MODE_MAIL_ADD:
  216.                         add_mail_user(cursor, connection, mail_address)
  217.                         print "E-Mail address: " + mail_address + " has been added"
  218.                 elif mode == MODE_MAIL_DELETE:
  219.                         delete_mail_user(cursor, connection, mail_address)
  220.                         print "E-Mail address: " + mailAddress + " has been deleted"
  221.                 elif mode == MODE_MAIL_LIST:
  222.                         list_mail_addresses(cursor, connection)
  223.                 elif mode == MODE_EXPORT:
  224.                         export_opml_file(cursor, connection, current_directory)
  225.                 elif mode == MODE_IMPORT:
  226.                         import_opml_file(cursor, connection, current_directory, download_directory, import_file_name)
  227.         else:
  228.                 print "Sorry, there was some sort of error: '" + error_string + "'\nExiting...\n"
  229.                 if connection:
  230.                         connection.close()
  231.  
  232.  
  233. def open_datasource(xml_url):
  234.         try:
  235.                 response = urllib2.urlopen(xml_url)
  236.         except ValueError:
  237.                 try:
  238.                         response = open(xml_url,'r')
  239.                 except ValueError:
  240.                         print "ERROR - Invalid feed!"
  241.                         response = False
  242.         except urllib2.URLError:
  243.                 print "ERROR - Connection problems. Please try again later"
  244.                 response = False
  245.         except httplib.IncompleteRead:
  246.                 print "ERROR - Incomplete data read. Please try again later"
  247.                 response = False
  248.         if response != False:
  249.                 return response.read()
  250.         else:
  251.                 return response
  252.  
  253. def export_opml_file(cur, conn, cur_dir):
  254.         item_count = 0
  255.         feed_name = ""
  256.         feed_url = ""
  257.         last_ep = ""
  258.         now = datetime.datetime.now()
  259.         file_name = cur_dir + os.sep + "podgrab_subscriptions-" + str(now.year) + "-" + str(now.month) + "-" + str(now.day) + ".opml"
  260.         subs = get_subscriptions(cur, conn)
  261.         file_handle = open(file_name,"w")
  262.         print "Exporting RSS subscriptions database to: '" + file_name + "' OPML file...please wait.\n"
  263.         header = "<opml version=\"2.0\">\n<head>\n\t<title>PodGrab Subscriptions</title>\n</head>\n<body>\n"
  264.         file_handle.writelines(header)
  265.         for sub in subs:
  266.                 feed_name = sub[0]
  267.                 feed_url = sub[1]
  268.                 last_ep = sub[2]
  269.                 file_handle.writelines("\t<outline title=\"" + feed_name + "\" text=\"" + feed_name + "\" type=\"rss\" xmlUrl=\"" + feed_url + "\" htmlUrl=\"" + feed_url + "\"/>\n")
  270.                 print "Exporting subscription '" + feed_name + "'...Done.\n"
  271.                 item_count = item_count + 1
  272.         footer = "</body>\n</opml>"
  273.         file_handle.writelines(footer)
  274.         file_handle.close()
  275.         print str(item_count) + " item(s) exported to: '" + file_name + "'. COMPLETE"
  276.  
  277.  
  278. def import_opml_file(cur, conn, cur_dir, download_dir, import_file):
  279.         count = 0
  280.         print "Importing OPML file '" + import_file + "'..."
  281.         if import_file.startswith("/") or import_file.startswith(".."):
  282.                 data = open_datasource(import_file)
  283.                 if not data:
  284.                         print "ERROR = Could not open OPML file '" + import_file + "'"
  285.         else:
  286.                 data = open_datasource(cur_dir + os.sep + import_file)
  287.                 if not data:
  288.                         print "ERROR - Could not open OPML file '" + cur_dir + os.sep + import_file + "'"
  289.         if data:
  290.                 print "File opened...please wait"
  291.                 try:
  292.                         xml_data = xml.dom.minidom.parseString(data)
  293.                         items = xml_data.getElementsByTagName('outline')
  294.                         for item in items:
  295.                                 item_feed = item.getAttribute('xmlUrl')
  296.                                 item_name = item.getAttribute('title')
  297.                                 item_name = clean_string(item_name)
  298.                                 print "Subscription Title: " + item_name
  299.                                 print "Subscription Feed: " + item_feed
  300.                                 item_directory = download_dir + os.sep + item_name
  301.                        
  302.                                 if not os.path.exists(item_directory):
  303.                                         os.makedirs(item_directory)
  304.                                 if not does_sub_exist(cur, conn, item_feed):
  305.                                         insert_subscription(cur, conn, item_name, item_feed)
  306.                                         count = count + 1
  307.                                 else:
  308.                                         print "This subscription is already present in the database. Skipping..."
  309.                                 print "\n"
  310.                         print "\nA total of " + str(count) + " subscriptions have been added from OPML file: '" + import_file + "'"
  311.                         print "These will be updated on the next update run.\n"
  312.                 except xml.parsers.expat.ExpatError:
  313.                         print "ERROR - Malformed XML syntax in feed. Skipping..."
  314.  
  315.  
  316. def iterate_feed(data, mode, download_dir, today, cur, conn, feed):
  317.         print "Iterating feed..."
  318.         message = ""
  319.         try:
  320.                 xml_data = xml.dom.minidom.parseString(data)
  321.                 for channel in xml_data.getElementsByTagName('channel'):
  322.                         channel_title = channel.getElementsByTagName('title')[0].firstChild.data
  323.                         channel_link = channel.getElementsByTagName('link')[0].firstChild.data
  324.                         print "Channel Title: ===" + channel_title + "==="
  325.                         print "Channel Link: " + channel_link
  326.                         channel_title = clean_string(channel_title)
  327.                  
  328.                         channel_directory = download_dir + os.sep + channel_title
  329.                         if not os.path.exists(channel_directory):
  330.                                 os.makedirs(channel_directory)
  331.                         print "Current Date: ", today
  332.                         if mode == MODE_DOWNLOAD:
  333.                                 print "Bulk download. Processing..."
  334.                           # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later
  335.                                 num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title)
  336.                                 print "\n", num_podcasts, "have been downloaded"
  337.                         elif mode == MODE_SUBSCRIBE:
  338.                                 print "Feed to subscribe to: " + feed + ". Checking for database duplicate..."
  339.                                 if not does_sub_exist(cur, conn, feed):
  340.                                         print "Subscribe. Processing..."
  341.                             # 2011-10-06 Replaced channel_directory with channel_title - needed for m3u file later
  342.                                         num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title)
  343.                            
  344.                                         print "\n", num_podcasts, "have been downloaded from your subscription"
  345.                                 else:
  346.                                         print "Subscription already exists! Skipping..."
  347.                         elif mode == MODE_UPDATE:
  348.                                 print "Updating RSS feeds. Processing..."
  349.                                 num_podcasts = iterate_channel(channel, today, mode, cur, conn, feed, channel_title)
  350.                                 message += str(num_podcasts) + " have been downloaded from your subscription: '" + channel_title + "'\n"
  351.         except xml.parsers.expat.ExpatError:
  352.                 print "ERROR - Malformed XML syntax in feed. Skipping..."
  353.                 message += "0 podcasts have been downloaded from this feed due to RSS syntax problems. Please try again later"
  354.         except UnicodeEncodeError:
  355.                 print "ERROR - Unicoce encoding error in string. Cannot convert to ASCII. Skipping..."
  356.                 message += "0 podcasts have been downloaded from this feed due to RSS syntax problems. Please try again later"
  357.         return message
  358.  
  359.  
  360. def clean_string(str):
  361.         new_string = str
  362.         if new_string.startswith("-"):
  363.                 new_string = new_string.lstrip("-")
  364.         if new_string.endswith("-"):
  365.                 new_string = new_string.rstrip("-")
  366.         new_string_final = ''
  367.         for c in new_string:
  368.                 if c.isalnum() or c == "-" or c == "." or c.isspace():
  369.                         new_string_final = new_string_final + ''.join(c)
  370.         new_string_final = new_string_final.strip()
  371.         new_string_final = new_string_final.replace(' ','-')
  372.         new_string_final = new_string_final.replace('---','-')
  373.         new_string_final = new_string_final.replace('--','-')
  374.         return new_string_final
  375.  
  376. # Change 2011-10-06 - Changed chan_loc to channel_title to help with relative path names
  377. # in the m3u file
  378. def write_podcast(item, channel_title, date, type):
  379.         (item_path, item_file_name) = os.path.split(item)
  380.         if len(item_file_name) > 50:
  381.                 item_file_name = item_file_name[:50]
  382.         today = datetime.date.today()
  383.         item_file_name = today.strftime("%Y%m%d") + item_file_name # 2011-10-06 Removed slashes
  384.         local_file = current_directory + os.sep + DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + clean_string(item_file_name)
  385.         if type == "video/quicktime" or type == "audio/mp4" or type == "video/mp4":
  386.                 if not local_file.endswith(".mp4"):
  387.                         local_file = local_file + ".mp4"
  388.         elif type == "video/mpeg":
  389.                 if not local_file.endswith(".mpg"):
  390.                         local_file = local_file + ".mpg"
  391.         elif type == "video/x-flv":
  392.                 if not local_file.endswith(".flv"):
  393.                         local_file = local_file + ".flv"
  394.         elif type == "video/x-ms-wmv":
  395.                 if not local_file.endswith(".wmv"):
  396.                         local_file = local_file + ".wmv"
  397.         elif type == "video/webm" or type == "audio/webm":
  398.                 if not local_file.endswith(".webm"):
  399.                         local_file = local_file + ".webm"
  400.         elif type == "audio/mpeg":
  401.                 if not local_file.endswith(".mp3"):
  402.                         local_file = local_file + ".mp3"
  403.         elif type == "audio/ogg" or type == "video/ogg" or type == "audio/vorbis":
  404.                 if not local_file.endswith(".ogg"):
  405.                         local_file = local_file + ".ogg"
  406.         elif type == "audio/x-ms-wma" or type == "audio/x-ms-wax":
  407.                 if not local_file.endswith(".wma"):
  408.                         local_file = local_file + ".wma"       
  409.         if os.path.exists(local_file):
  410.                 return 0
  411.         else:
  412.                 print "\nDownloading " + item_file_name + " which was published on " + date
  413.                 try:
  414.                         item_file = urllib2.urlopen(item)
  415.                         output = open(local_file, 'wb')
  416.                         # 2011-10-06 Werner Avenant - For some reason the file name changes when
  417.                         # saved to disk - probably a python feature (sorry, only wrote my first line of python today)
  418.                         item_file_name = os.path.basename(output.name)  
  419.                         output.write(item_file.read())
  420.                         output.close()
  421.                         print "Podcast: ", item, " downloaded to: ", local_file
  422.                        
  423.                         # 2011-11-06 Append to m3u file
  424.                         output = open(current_directory + os.sep + m3u_file, 'a')
  425.                         output.write(DOWNLOAD_DIRECTORY + os.sep + channel_title + os.sep + item_file_name + "\n")
  426.                         output.close()
  427.                         return 1
  428.                 except urllib2.URLError as e:
  429.                         print "ERROR - Could not write item to file: ", e
  430.  
  431.  
  432. def does_database_exist(curr_loc):
  433.         db_name = "PodGrab.db"
  434.         if os.path.exists(curr_loc + os.sep + db_name):
  435.                 return 1
  436.         else:
  437.                 return 0
  438.  
  439.  
  440. def add_mail_user(cur, conn, address):
  441.         row = (address,)
  442.         cur.execute('INSERT INTO email(address) VALUES (?)', row)
  443.         conn.commit()
  444.  
  445.  
  446. def delete_mail_user(cur, conn, address):
  447.         row = (address,)
  448.         cur.execute('DELETE FROM email WHERE address = ?', row)
  449.         conn.commit()
  450.  
  451.  
  452. def get_mail_users(cur, conn):
  453.         cur.execute('SELECT address FROM email')
  454.         return cur.fetchall()
  455.  
  456.  
  457. def list_mail_addresses(cur, conn):
  458.         cur.execute('SELECT * from email')
  459.         result = cur.fetchall()
  460.         print "Listing mail addresses..."
  461.         for address in result:
  462.                 print "Address:\t" + address[0]
  463.  
  464.  
  465. def has_mail_users(cur, conn):
  466.         cur.execute('SELECT COUNT(*) FROM email')
  467.         if cur.fetchone() == "0":
  468.                 return 0
  469.         else:
  470.                 return 1
  471.  
  472.  
  473. def mail_updates(cur, conn, mess, num_updates):
  474.         addresses = get_mail_users(cur, conn)
  475.         for address in addresses:
  476.                 try:
  477.                         subject_line = "PodGrab Update"
  478.                         if int(num_updates) > 0:
  479.                                 subject_line += " - NEW updates!"
  480.                         else:
  481.                                 subject_line += " - nothing new..."
  482.                         mail('localhost', 'podgrab@' + platform.node(), address[0], subject_line, mess)
  483.                         print "Successfully sent podcast updates e-mail to: " + address[0]
  484.                 except smtplib.SMTPException:
  485.                         traceback.print_exc()
  486.                         print "Could not send podcast updates e-mail to: " + address[0]
  487.  
  488.  
  489. def mail(server_url=None, sender='', to='', subject='', text=''):
  490.     headers = "From: %s\r\nTo: %s\r\nSubject: %s\r\n\r\n" % (sender, to, subject)
  491.     message = headers + text
  492.     mail_server = smtplib.SMTP(server_url)
  493.     mail_server.sendmail(sender, to, message)
  494.     mail_server.quit() 
  495.  
  496.  
  497. def connect_database(curr_loc):
  498.         conn = sqlite3.connect(curr_loc + os.sep + "PodGrab.db")
  499.         return conn
  500.  
  501. def setup_database(cur, conn):
  502.         cur.execute("CREATE TABLE subscriptions (channel text, feed text, last_ep text)")
  503.         cur.execute("CREATE TABLE email (address text)")
  504.         conn.commit()
  505.  
  506.  
  507. def insert_subscription(cur, conn, chan, feed):
  508.         chan.replace(' ', '-')
  509.         chan.replace('---','-')
  510.         row = (chan, feed, "NULL")
  511.         cur.execute('INSERT INTO subscriptions(channel, feed, last_ep) VALUES (?, ?, ?)', row)
  512.         conn.commit()
  513.  
  514.  
  515. def iterate_channel(chan, today, mode, cur, conn, feed, channel_title):
  516.         global total_items
  517.         global total_size
  518.         NUM_MAX_DOWNLOADS = 4
  519.         saved = 0
  520.         num = 0
  521.         size = 0
  522.         last_ep = "NULL"
  523.         print "Iterating channel..."
  524.         if mode == MODE_SUBSCRIBE:
  525.                 print "Feed: " + feed
  526.                 if does_sub_exist(cur, conn, feed):
  527.                         print "Podcast subscription exists - getting latest podcast"
  528.                         last_ep = get_last_subscription_downloaded(cur, conn, feed)
  529.                 else:
  530.                         print "Podcast subscription is new - getting previous podcast"
  531.                         insert_subscription(cur, conn, chan.getElementsByTagName('title')[0].firstChild.data, feed)
  532.         for item in chan.getElementsByTagName('item'):
  533.                 try:
  534.                         item_title = item.getElementsByTagName('title')[0].firstChild.data
  535.                         item_date = item.getElementsByTagName('pubDate')[0].firstChild.data
  536.                         item_file = item.getElementsByTagName('enclosure')[0].getAttribute('url')
  537.                         item_size = item.getElementsByTagName('enclosure')[0].getAttribute('length')
  538.                         item_type = item.getElementsByTagName('enclosure')[0].getAttribute('type')
  539.                         struct_time_today = strptime(today, "%a, %d %b %Y %H:%M:%S")
  540.                         try:
  541.                                 struct_time_item = strptime(fix_date(item_date), "%a, %d %b %Y %H:%M:%S")
  542.                                 has_error = 0  
  543.                         except TypeError:
  544.                                 has_error = 1
  545.                         except ValueError:
  546.                                 has_error = 1
  547.                         if mode == MODE_DOWNLOAD:
  548.                                 if not has_error:
  549.           #Changed 2011-06-10 Replaced chan_dir with channel_title
  550.                                         saved = write_podcast(item_file, channel_title, item_date, item_type)
  551.                                 else:
  552.                                         saved = 0
  553.                                         print "This item has a badly formatted date. Cannot download!"
  554.                                 if saved > 0:
  555.                                         print "\nTitle: " + item_title
  556.                                         print "Date:  " + item_date
  557.                                         print "File:  " + item_file
  558.                                         print "Size:  " + item_size + " bytes"
  559.                                         print "Downloading " + item_file + "..."
  560.                                 num = num + saved
  561.                                 size = size + int(item_size)
  562.                                 total_size += size
  563.                                 total_items += num
  564.                         elif mode == MODE_SUBSCRIBE or mode == MODE_UPDATE:
  565.                                 if (last_ep == "NULL"):
  566.                                         last_ep = fix_date(item_date)
  567.                                         update_subscription(cur, conn, feed, last_ep)
  568.                                 try:
  569.                                         struct_last_ep = strptime(last_ep, "%a, %d %b %Y %H:%M:%S")
  570.                                         has_error = 0
  571.                                 except TypeError:
  572.                                         has_error = 1
  573.                                         print "This item has a badly formatted date. Cannot download!"
  574.                                 except ValueError:
  575.                                         has_error = 1
  576.                                         print "This item has a badly formatted date. Cannot download!"
  577.                                 if not has_error:
  578.                                         if mktime(struct_time_item) <= mktime(struct_time_today) and mktime(struct_time_item) >= mktime(struct_last_ep):
  579.                                                 saved = write_podcast(item_file, channel_title, item_date, item_type)
  580.                                                 if saved > 0:
  581.                                                         print "\nTitle: " + item_title
  582.                                                         print "Date:  " + item_date
  583.                                                         print "File:  " + item_file
  584.                                                         print "Size:  " + item_size + " bytes"
  585.                                                         print "Type:  " + item_type
  586.                                                         update_subscription(cur, conn, feed, fix_date(item_date))
  587.                                                         num = num + saved
  588.                                                         size = size + int(item_size)
  589.                                                         total_size += size
  590.                                                         total_items += num
  591.                                                 if (num >= NUM_MAX_DOWNLOADS):
  592.                                                         print "Maximum session download of " + str(NUM_MAX_DOWNLOADS) + " podcasts has been reached. Exiting."
  593.                                                         break
  594.                 except IndexError, e:
  595.                         #traceback.print_exc()
  596.                         print "This RSS item has no downloadable URL link for the podcast for '" + item_title  + "'. Skipping..."
  597.         return str(num) + " podcasts totalling " + str(size) + " bytes"
  598.  
  599.  
  600. def fix_date(date):
  601.         new_date = ""
  602.         split_array = date.split(' ')
  603.         for i in range(0,5):
  604.                 new_date = new_date + split_array[i] + " "
  605.         return new_date.rstrip()
  606.  
  607.  
  608. def does_sub_exist(cur, conn, feed):
  609.         row = (feed,)
  610.         cur.execute('SELECT COUNT (*) FROM subscriptions WHERE feed = ?', row)
  611.         return_string = str(cur.fetchone())[1]
  612.         if return_string == "0":
  613.                 return 0
  614.         else:
  615.                 return 1
  616.  
  617.  
  618. def delete_subscription(cur, conn, url):
  619.         row = (url,)
  620.         cur.execute('DELETE FROM subscriptions WHERE feed = ?', row)
  621.         conn.commit()
  622.  
  623.  
  624. def get_name_from_feed(cur, conn, url):
  625.         row = (url,)
  626.         cur.execute('SELECT channel from subscriptions WHERE feed = ?', row)
  627.         return_string = cur.fetchone()
  628.         try:
  629.                 return_string = ''.join(return_string)
  630.         except TypeError:
  631.                 return_string = "None"
  632.         return str(return_string)
  633.  
  634.  
  635. def list_subscriptions(cur, conn):
  636.         count = 0
  637.         try:
  638.                 result = cur.execute('SELECT * FROM subscriptions')
  639.                 for sub in result:
  640.                         print "Name:\t\t", sub[0]
  641.                         print "Feed:\t\t", sub[1]
  642.                         print "Last Ep:\t", sub[2], "\n"
  643.                         count += 1
  644.                 print str(count) + " subscriptions present"
  645.         except sqlite3.OperationalError:
  646.                 print "There are no current subscriptions or there was an error"
  647.  
  648.  
  649. def get_subscriptions(cur, conn):
  650.         try:
  651.                 cur.execute('SELECT * FROM subscriptions')
  652.                 return cur.fetchall()
  653.         except sqlite3.OperationalError:
  654.                 print "There are no current subscriptions"
  655.                 return null
  656.  
  657.  
  658. def update_subscription(cur, conn, feed, date):
  659.         row = (date, feed)
  660.         cur.execute('UPDATE subscriptions SET last_ep = ? where feed = ?', row)
  661.         conn.commit()
  662.  
  663.  
  664. def get_last_subscription_downloaded(cur, conn, feed):
  665.         row = (feed,)
  666.         cur.execute('SELECT last_ep FROM subscriptions WHERE feed = ?', row)
  667.         return cur.fetchone()
  668.  
  669. if __name__ == "__main__":
  670.         main(sys.argv[1:])
  671.  
  672.  
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top