Advertisement
Guest User

Untitled

a guest
Apr 30th, 2013
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 8.64 KB | None | 0 0
  1. --- aaa/rssdler.py  2009-10-01 16:04:26.000000000 +0200
  2. +++ bbb/rssdler.py  2013-04-30 09:25:05.000000000 +0200
  3. @@ -425,7 +425,31 @@
  4.  # # # # #
  5.  # Network Communication
  6.  # # # # #
  7. -def getFilenameFromHTTP(info, url):
  8. +def getUrlFromRedirect(url):
  9. +    u"""url is an URL that is checked for possible redirects. The final
  10. +    redirected URL is returned."""
  11. +    attempts = 0
  12. +    turl = url
  13. +    while attempts < 10:
  14. +        host, path, query = urlparse.urlsplit(turl)[1:4]
  15. +        if host.strip() == '':
  16. +            break
  17. +        try:
  18. +            connection = httplib.HTTPConnection(host, timeout=10)
  19. +            connection.request("GET", path + '?' + query)
  20. +            resp = connection.getresponse()
  21. +        except:
  22. +            break
  23. +        attempts += 1
  24. +        if resp.status >= 300 and resp.status <= 399:
  25. +            turl = resp.getheader('location')
  26. +        elif resp.status >= 200 and resp.status <= 299:
  27. +            return turl
  28. +        else:
  29. +            break
  30. +    return url
  31. +
  32. +def getFilenameFromHTTP(info, url, threadName):
  33.      u"""info is an http header from the download,
  34.      url is the url to the downloaded file (responseObject.geturl() )."""
  35.      filename = None
  36. @@ -437,6 +461,11 @@
  37.          logging.debug(u"filename from content-disposition header")
  38.          return unicodeC( filename )
  39.      logging.debug(u"filename from url")
  40. +    if getConfig()['threads'][threadName]['nameFromRedirect'] or (
  41. +            getConfig()['global']['nameFromRedirect'] and not
  42. +            getConfig()['threads'][threadName]['nameFromRedirect'] == False
  43. +            ):
  44. +        url = getUrlFromRedirect(url)
  45.      filename = percentUnQuote( urlparse.urlparse( url )[2].split('/')[-1] )
  46.      try: typeGuess = info.gettype()
  47.      except AttributeError: typeGuess = None
  48. @@ -779,7 +808,7 @@
  49.          logging.critical(''.join((traceback.format_exc(), os.linesep,
  50.            u'error grabbing url: %s' % link)))
  51.          return False
  52. -    filename = getFilenameFromHTTP(data.info(), link)
  53. +    filename = getFilenameFromHTTP(data.info(), link, threadName)
  54.      if not filename: return False
  55.      size, data2 = getFileSize(data.info(), data)
  56.      if size and not checkFileSize(size, threadName, downItemConfig):
  57. @@ -1154,6 +1183,8 @@
  58.              around before actually giving you the file to download.
  59.              Mechanize has the ability to follow these sites.
  60.      noClobber: [Optional] Boolean. Default True. Overwrite file, or use new name
  61. +    nameFromRedirect: [Optional] Boolean. Default False. Get filename from the
  62. +        final redirect of given feed items.
  63.      rssFeed: [Optional] Boolean Option. Default False. Setting this option
  64.          allows you to create your own rss feed of the objects you have
  65.          downloaded. It's a basic feed, likely to not include links to the
  66. @@ -1204,6 +1235,7 @@
  67.          self['cookieType'] = 'MozillaCookieJar'
  68.          self['sleepTime'] = 1
  69.          self['noClobber'] = True
  70. +        self['nameFromRedirect'] = False
  71.          self['umask'] = 77
  72.          self['debug'] = False
  73.  
  74. @@ -1297,6 +1329,9 @@
  75.          for each thread is:
  76.          for X = global scanMins, Y = thread scanMins, Z = ttl Mins:
  77.          min{nX | nX >= Y ; nX >= Z ; n \u2208 \u2115 }
  78. +    nameFromRedirect: [Optional] Boolean. Default None. Get filename from the
  79. +        final redirect of given feed items. Overrides global option
  80. +        "nameFromRedirect" if set.
  81.      checkTime: DEPRECATED. Will no longer be processed.
  82.      Programmers Note:
  83.          download<x>* stored in a DownloadItemConfig() Dict in .downloads.
  84. @@ -1305,7 +1340,7 @@
  85.      def __init__(self, name=None, link=None, active=True, maxSize=None,
  86.          minSize=None, noSave=False, directory=None, regExTrue=None,
  87.          regExTrueOptions=None, regExFalse=None, regExFalseOptions=None,
  88. -        postDownloadFunction=None, scanMins=0):
  89. +        postDownloadFunction=None, scanMins=0, nameFromRedirect=None):
  90.          dict.__init__(self)
  91.          self['link'] = link
  92.          self['active'] = active
  93. @@ -1320,6 +1355,7 @@
  94.          self['regExFalseOptions'] = regExFalseOptions
  95.          self['postDownloadFunction'] = postDownloadFunction
  96.          self['scanMins'] = scanMins
  97. +        self['nameFromRedirect'] = nameFromRedirect
  98.          self['downloads'] = []
  99.          self['postScanFunction'] = None
  100.          self['preScanFunction'] = None
  101. @@ -1455,8 +1491,8 @@
  102.          'Saturday', 'Sunday', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun',
  103.          '0', '1', '2', '3', '4', '5', '6']
  104.      boolOptionsGlobal = ['runOnce', 'active', 'rssFeed', 'urllib', 'noClobber',
  105. -        'debug']
  106. -    boolOptionsThread = ['active', 'noSave']
  107. +        'nameFromRedirect', 'debug']
  108. +    boolOptionsThread = ['active', 'noSave', 'nameFromRedirect']
  109.      stringOptionsGlobal = ['downloadDir', 'saveFile', 'cookieFile', 'cookieType'
  110.          , 'logFile', 'workingDir', 'daemonInfo', 'rssFilename', 'rssLink',
  111.          'rssDescription', 'rssTitle', ]
  112. @@ -1741,7 +1777,7 @@
  113.  def userFunctHandling():
  114.      u"""tries to import userFunctions, sets up the namespace
  115.      reserved words in userFunctions: everything in globals() except '__builtins__', '__name__', '__doc__', 'userFunctHandling', 'callUserFunction', 'userFunctions'. If using daemon mode, 'resource' is reserved.
  116. -    Reserved words: 'Config', 'ConfigParser', 'DownloadItemConfig', 'FailedItem', 'Fkout', 'GlobalOptions', 'LevelFilter', 'Locked', 'MAXFD', 'MakeRss', 'SaveInfo', 'SaveProcessor', 'StringIO', 'ThreadLink', '_USER_AGENT', '__author__', '__copyright__', '__file__', '__package__', '__version__', '_action', '_configInstance', '_main', '_runOnce', 'bdecode', 'callDaemon', 'callUserFunction', 'checkFileSize', 'checkRegEx', 'checkRegExDown', 'checkRegExGFalse', 'checkRegExGTrue', 'checkScanTime', 'checkSleep', 'cliOptions', 'codecs', 'commentConfig', 'configFile', 'configFileNotes', 'convertKDEToMoz', 'convertMoz3ToNet', 'convertSafariToMoz', 'cookieHandler', 'cookielib', 'createDaemon', 'division', 'downloadFile', 'downloader', 'email', 'encodeQuoteUrl', 'feedparser', 'findNewFile', 'getConfig', 'getFileSize', 'getFilenameFromHTTP', 'getSaved', 'getVersion', 'getopt', 'helpMessage', 'htmlUnQuote', 'httplib', 'isRunning', 'killDaemon', 'logging', 'main', 'make_handler', 'mechRetrievePage', 'mechanize', 'mimetypes', 'minidom', 'natsorted', 'netscapeHeader', 'nonCoreDependencies', 'noprint', 'operator', 'os', 'percentIsQuoted', 'percentNeedsQuoted', 'percentQuote', 'percentQuoteDict', 'percentUnQuote', 'percentunQuoteDict', 'pickle', 'random', 're', 'resource', 'rss', 'rssparse', 'run', 'saved', 'searchFailed', 'securityIssues', 'setDebug', 'setLogging', 'sgmllib', 'signal', 'signalHandler', 'socket', 'sqlite3', 'sys', 'time', 'traceback', 'unQuoteReQuote', 'unicodeC', 'urllib', 'urllib2', 'urllib2RetrievePage', 'urlparse', 'userFunctHandling', 'userFunctions', 'validFileName', 'writeNewFile', 'xmlUnEscape'
  117. +    Reserved words: 'Config', 'ConfigParser', 'DownloadItemConfig', 'FailedItem', 'Fkout', 'GlobalOptions', 'LevelFilter', 'Locked', 'MAXFD', 'MakeRss', 'SaveInfo', 'SaveProcessor', 'StringIO', 'ThreadLink', '_USER_AGENT', '__author__', '__copyright__', '__file__', '__package__', '__version__', '_action', '_configInstance', '_main', '_runOnce', 'bdecode', 'callDaemon', 'callUserFunction', 'checkFileSize', 'checkRegEx', 'checkRegExDown', 'checkRegExGFalse', 'checkRegExGTrue', 'checkScanTime', 'checkSleep', 'cliOptions', 'codecs', 'commentConfig', 'configFile', 'configFileNotes', 'convertKDEToMoz', 'convertMoz3ToNet', 'convertSafariToMoz', 'cookieHandler', 'cookielib', 'createDaemon', 'division', 'downloadFile', 'downloader', 'email', 'encodeQuoteUrl', 'feedparser', 'findNewFile', 'getConfig', 'getFileSize', 'getUrlFromRedirect', 'getFilenameFromHTTP', 'getSaved', 'getVersion', 'getopt', 'helpMessage', 'htmlUnQuote', 'httplib', 'isRunning', 'killDaemon', 'logging', 'main', 'make_handler', 'mechRetrievePage', 'mechanize', 'mimetypes', 'minidom', 'natsorted', 'netscapeHeader', 'nonCoreDependencies', 'noprint', 'operator', 'os', 'percentIsQuoted', 'percentNeedsQuoted', 'percentQuote', 'percentQuoteDict', 'percentUnQuote', 'percentunQuoteDict', 'pickle', 'random', 're', 'resource', 'rss', 'rssparse', 'run', 'saved', 'searchFailed', 'securityIssues', 'setDebug', 'setLogging', 'sgmllib', 'signal', 'signalHandler', 'socket', 'sqlite3', 'sys', 'time', 'traceback', 'unQuoteReQuote', 'unicodeC', 'urllib', 'urllib2', 'urllib2RetrievePage', 'urlparse', 'userFunctHandling', 'userFunctions', 'validFileName', 'writeNewFile', 'xmlUnEscape'
  118.      check docstrings/source for use notes on these reserved words."""
  119.      global userFunctions
  120.      bypassGlobalsList = set(('__builtins__', '__name__', '__doc__'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement