Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- --- aaa/rssdler.py 2009-10-01 16:04:26.000000000 +0200
- +++ bbb/rssdler.py 2013-04-30 09:25:05.000000000 +0200
- @@ -425,7 +425,31 @@
- # # # # #
- # Network Communication
- # # # # #
- -def getFilenameFromHTTP(info, url):
- +def getUrlFromRedirect(url):
- + u"""url is an URL that is checked for possible redirects. The final
- + redirected URL is returned."""
- + attempts = 0
- + turl = url
- + while attempts < 10:
- + host, path, query = urlparse.urlsplit(turl)[1:4]
- + if host.strip() == '':
- + break
- + try:
- + connection = httplib.HTTPConnection(host, timeout=10)
- + connection.request("GET", path + '?' + query)
- + resp = connection.getresponse()
- + except:
- + break
- + attempts += 1
- + if resp.status >= 300 and resp.status <= 399:
- + turl = resp.getheader('location')
- + elif resp.status >= 200 and resp.status <= 299:
- + return turl
- + else:
- + break
- + return url
- +
- +def getFilenameFromHTTP(info, url, threadName):
- u"""info is an http header from the download,
- url is the url to the downloaded file (responseObject.geturl() )."""
- filename = None
- @@ -437,6 +461,11 @@
- logging.debug(u"filename from content-disposition header")
- return unicodeC( filename )
- logging.debug(u"filename from url")
- + if getConfig()['threads'][threadName]['nameFromRedirect'] or (
- + getConfig()['global']['nameFromRedirect'] and not
- + getConfig()['threads'][threadName]['nameFromRedirect'] == False
- + ):
- + url = getUrlFromRedirect(url)
- filename = percentUnQuote( urlparse.urlparse( url )[2].split('/')[-1] )
- try: typeGuess = info.gettype()
- except AttributeError: typeGuess = None
- @@ -779,7 +808,7 @@
- logging.critical(''.join((traceback.format_exc(), os.linesep,
- u'error grabbing url: %s' % link)))
- return False
- - filename = getFilenameFromHTTP(data.info(), link)
- + filename = getFilenameFromHTTP(data.info(), link, threadName)
- if not filename: return False
- size, data2 = getFileSize(data.info(), data)
- if size and not checkFileSize(size, threadName, downItemConfig):
- @@ -1154,6 +1183,8 @@
- around before actually giving you the file to download.
- Mechanize has the ability to follow these sites.
- noClobber: [Optional] Boolean. Default True. Overwrite file, or use new name
- + nameFromRedirect: [Optional] Boolean. Default False. Get filename from the
- + final redirect of given feed items.
- rssFeed: [Optional] Boolean Option. Default False. Setting this option
- allows you to create your own rss feed of the objects you have
- downloaded. It's a basic feed, likely to not include links to the
- @@ -1204,6 +1235,7 @@
- self['cookieType'] = 'MozillaCookieJar'
- self['sleepTime'] = 1
- self['noClobber'] = True
- + self['nameFromRedirect'] = False
- self['umask'] = 77
- self['debug'] = False
- @@ -1297,6 +1329,9 @@
- for each thread is:
- for X = global scanMins, Y = thread scanMins, Z = ttl Mins:
- min{nX | nX >= Y ; nX >= Z ; n \u2208 \u2115 }
- + nameFromRedirect: [Optional] Boolean. Default None. Get filename from the
- + final redirect of given feed items. Overrides global option
- + "nameFromRedirect" if set.
- checkTime: DEPRECATED. Will no longer be processed.
- Programmers Note:
- download<x>* stored in a DownloadItemConfig() Dict in .downloads.
- @@ -1305,7 +1340,7 @@
- def __init__(self, name=None, link=None, active=True, maxSize=None,
- minSize=None, noSave=False, directory=None, regExTrue=None,
- regExTrueOptions=None, regExFalse=None, regExFalseOptions=None,
- - postDownloadFunction=None, scanMins=0):
- + postDownloadFunction=None, scanMins=0, nameFromRedirect=None):
- dict.__init__(self)
- self['link'] = link
- self['active'] = active
- @@ -1320,6 +1355,7 @@
- self['regExFalseOptions'] = regExFalseOptions
- self['postDownloadFunction'] = postDownloadFunction
- self['scanMins'] = scanMins
- + self['nameFromRedirect'] = nameFromRedirect
- self['downloads'] = []
- self['postScanFunction'] = None
- self['preScanFunction'] = None
- @@ -1455,8 +1491,8 @@
- 'Saturday', 'Sunday', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun',
- '0', '1', '2', '3', '4', '5', '6']
- boolOptionsGlobal = ['runOnce', 'active', 'rssFeed', 'urllib', 'noClobber',
- - 'debug']
- - boolOptionsThread = ['active', 'noSave']
- + 'nameFromRedirect', 'debug']
- + boolOptionsThread = ['active', 'noSave', 'nameFromRedirect']
- stringOptionsGlobal = ['downloadDir', 'saveFile', 'cookieFile', 'cookieType'
- , 'logFile', 'workingDir', 'daemonInfo', 'rssFilename', 'rssLink',
- 'rssDescription', 'rssTitle', ]
- @@ -1741,7 +1777,7 @@
- def userFunctHandling():
- u"""tries to import userFunctions, sets up the namespace
- reserved words in userFunctions: everything in globals() except '__builtins__', '__name__', '__doc__', 'userFunctHandling', 'callUserFunction', 'userFunctions'. If using daemon mode, 'resource' is reserved.
- - Reserved words: 'Config', 'ConfigParser', 'DownloadItemConfig', 'FailedItem', 'Fkout', 'GlobalOptions', 'LevelFilter', 'Locked', 'MAXFD', 'MakeRss', 'SaveInfo', 'SaveProcessor', 'StringIO', 'ThreadLink', '_USER_AGENT', '__author__', '__copyright__', '__file__', '__package__', '__version__', '_action', '_configInstance', '_main', '_runOnce', 'bdecode', 'callDaemon', 'callUserFunction', 'checkFileSize', 'checkRegEx', 'checkRegExDown', 'checkRegExGFalse', 'checkRegExGTrue', 'checkScanTime', 'checkSleep', 'cliOptions', 'codecs', 'commentConfig', 'configFile', 'configFileNotes', 'convertKDEToMoz', 'convertMoz3ToNet', 'convertSafariToMoz', 'cookieHandler', 'cookielib', 'createDaemon', 'division', 'downloadFile', 'downloader', 'email', 'encodeQuoteUrl', 'feedparser', 'findNewFile', 'getConfig', 'getFileSize', 'getFilenameFromHTTP', 'getSaved', 'getVersion', 'getopt', 'helpMessage', 'htmlUnQuote', 'httplib', 'isRunning', 'killDaemon', 'logging', 'main', 'make_handler', 'mechRetrievePage', 'mechanize', 'mimetypes', 'minidom', 'natsorted', 'netscapeHeader', 'nonCoreDependencies', 'noprint', 'operator', 'os', 'percentIsQuoted', 'percentNeedsQuoted', 'percentQuote', 'percentQuoteDict', 'percentUnQuote', 'percentunQuoteDict', 'pickle', 'random', 're', 'resource', 'rss', 'rssparse', 'run', 'saved', 'searchFailed', 'securityIssues', 'setDebug', 'setLogging', 'sgmllib', 'signal', 'signalHandler', 'socket', 'sqlite3', 'sys', 'time', 'traceback', 'unQuoteReQuote', 'unicodeC', 'urllib', 'urllib2', 'urllib2RetrievePage', 'urlparse', 'userFunctHandling', 'userFunctions', 'validFileName', 'writeNewFile', 'xmlUnEscape'
- + Reserved words: 'Config', 'ConfigParser', 'DownloadItemConfig', 'FailedItem', 'Fkout', 'GlobalOptions', 'LevelFilter', 'Locked', 'MAXFD', 'MakeRss', 'SaveInfo', 'SaveProcessor', 'StringIO', 'ThreadLink', '_USER_AGENT', '__author__', '__copyright__', '__file__', '__package__', '__version__', '_action', '_configInstance', '_main', '_runOnce', 'bdecode', 'callDaemon', 'callUserFunction', 'checkFileSize', 'checkRegEx', 'checkRegExDown', 'checkRegExGFalse', 'checkRegExGTrue', 'checkScanTime', 'checkSleep', 'cliOptions', 'codecs', 'commentConfig', 'configFile', 'configFileNotes', 'convertKDEToMoz', 'convertMoz3ToNet', 'convertSafariToMoz', 'cookieHandler', 'cookielib', 'createDaemon', 'division', 'downloadFile', 'downloader', 'email', 'encodeQuoteUrl', 'feedparser', 'findNewFile', 'getConfig', 'getFileSize', 'getUrlFromRedirect', 'getFilenameFromHTTP', 'getSaved', 'getVersion', 'getopt', 'helpMessage', 'htmlUnQuote', 'httplib', 'isRunning', 'killDaemon', 'logging', 'main', 'make_handler', 'mechRetrievePage', 'mechanize', 'mimetypes', 'minidom', 'natsorted', 'netscapeHeader', 'nonCoreDependencies', 'noprint', 'operator', 'os', 'percentIsQuoted', 'percentNeedsQuoted', 'percentQuote', 'percentQuoteDict', 'percentUnQuote', 'percentunQuoteDict', 'pickle', 'random', 're', 'resource', 'rss', 'rssparse', 'run', 'saved', 'searchFailed', 'securityIssues', 'setDebug', 'setLogging', 'sgmllib', 'signal', 'signalHandler', 'socket', 'sqlite3', 'sys', 'time', 'traceback', 'unQuoteReQuote', 'unicodeC', 'urllib', 'urllib2', 'urllib2RetrievePage', 'urlparse', 'userFunctHandling', 'userFunctions', 'validFileName', 'writeNewFile', 'xmlUnEscape'
- check docstrings/source for use notes on these reserved words."""
- global userFunctions
- bypassGlobalsList = set(('__builtins__', '__name__', '__doc__'))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement