Advertisement
Guest User

Untitled

a guest
Oct 19th, 2016
109
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.51 KB | None | 0 0
  1. #!/usr/bin/python -OO
  2. # Copyright 2008-2015 The SABnzbd-Team <team@sabnzbd.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # of the License, or (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  17.  
  18. """
  19. sabnzbd.assembler - threaded assembly/decoding of files
  20. """
  21.  
  22. import os
  23. import Queue
  24. import logging
  25. import struct
  26. import re
  27. from threading import Thread
  28. from time import sleep
  29. try:
  30.     import hashlib
  31.     new_md5 = hashlib.md5
  32. except:
  33.     import md5
  34.     new_md5 = md5.new
  35.  
  36. import sabnzbd
  37. from sabnzbd.misc import get_filepath, sanitize_filename, get_unique_filename, renamer, \
  38.     set_permissions, flag_file, long_path, clip_path
  39. from sabnzbd.constants import QCHECK_FILE, Status
  40. import sabnzbd.cfg as cfg
  41. from sabnzbd.articlecache import ArticleCache
  42. from sabnzbd.postproc import PostProcessor
  43. import sabnzbd.downloader
  44. from sabnzbd.utils.rarfile import RarFile, is_rarfile
  45. from sabnzbd.encoding import unicoder, is_utf8
  46. from sabnzbd.rating import Rating
  47.  
  48.  
  49. class Assembler(Thread):
  50.     do = None  # Link to the instance of this method
  51.  
  52.     def __init__(self, queue=None):
  53.         Thread.__init__(self)
  54.  
  55.         if queue:
  56.             self.queue = queue
  57.         else:
  58.             self.queue = Queue.Queue()
  59.         Assembler.do = self
  60.  
  61.     def stop(self):
  62.         self.process(None)
  63.  
  64.     def process(self, job):
  65.         self.queue.put(job)
  66.  
  67.     def run(self):
  68.         import sabnzbd.nzbqueue
  69.         while 1:
  70.             job = self.queue.get()
  71.             if not job:
  72.                 logging.info("Shutting down")
  73.                 break
  74.  
  75.             nzo, nzf = job
  76.  
  77.             if nzf:
  78.                 sabnzbd.CheckFreeSpace()
  79.                 filename = sanitize_filename(nzf.filename)
  80.                 nzf.filename = filename
  81.  
  82.                 dupe = nzo.check_for_dupe(nzf)
  83.  
  84.                 filepath = get_filepath(long_path(cfg.download_dir.get_path()), nzo, filename)
  85.  
  86.                 if filepath:
  87.                     logging.info('Decoding %s %s', filepath, nzf.type)
  88.                     try:
  89.                         filepath = _assemble(nzf, filepath, dupe)
  90.                     except IOError, (errno, strerror):
  91.                         if nzo.is_gone():
  92.                             # Job was deleted, ignore error
  93.                             pass
  94.                         else:
  95.                             # 28 == disk full => pause downloader
  96.                             if errno == 28:
  97.                                 logging.error(T('Disk full! Forcing Pause'))
  98.                             else:
  99.                                 logging.error(T('Disk error on creating file %s'), clip_path(filepath))
  100.                             # Pause without saving
  101.                             sabnzbd.downloader.Downloader.do.pause(save=False)
  102.                     except:
  103.                         logging.error(T('Fatal error in Assembler'), exc_info=True)
  104.                         break
  105.  
  106.                     nzf.remove_admin()
  107.                     setname = nzf.setname
  108.                     if nzf.is_par2 and (nzo.md5packs.get(setname) is None):
  109.                         pack = GetMD5Hashes(filepath)[0]
  110.                         if pack:
  111.                             nzo.md5packs[setname] = pack
  112.                             logging.debug('Got md5pack for set %s', setname)
  113.  
  114.                     if check_encrypted_rar(nzo, filepath):
  115.                         if cfg.pause_on_pwrar() == 1:
  116.                             logging.warning(T('WARNING: Paused job "%s" because of encrypted RAR file'), nzo.final_name)
  117.                             nzo.pause()
  118.                         else:
  119.                             logging.warning(T('WARNING: Aborted job "%s" because of encrypted RAR file'), nzo.final_name)
  120.                             nzo.fail_msg = T('Aborted, encryption detected')
  121.                             import sabnzbd.nzbqueue
  122.                             sabnzbd.nzbqueue.NzbQueue.do.end_job(nzo)
  123.  
  124.                     unwanted = rar_contains_unwanted_file(filepath)
  125.                     if unwanted:
  126.                         logging.warning(T('WARNING: In "%s" unwanted extension in RAR file. Unwanted file is %s '), nzo.final_name, unwanted)
  127.                         logging.debug(T('Unwanted extension is in rar file %s'), filepath)
  128.                         if cfg.action_on_unwanted_extensions() == 1 and nzo.unwanted_ext == 0:
  129.                             logging.debug('Unwanted extension ... pausing')
  130.                             nzo.unwanted_ext = 1
  131.                             nzo.pause()
  132.                         if cfg.action_on_unwanted_extensions() == 2:
  133.                             logging.debug('Unwanted extension ... aborting')
  134.                             nzo.fail_msg = T('Aborted, unwanted extension detected')
  135.                             import sabnzbd.nzbqueue
  136.                             sabnzbd.nzbqueue.NzbQueue.do.end_job(nzo)
  137.  
  138.                     filter, reason = nzo_filtered_by_rating(nzo)
  139.                     if filter == 1:
  140.                         logging.warning(Ta('WARNING: Paused job "%s" because of rating (%s)'), nzo.final_name, reason)
  141.                         nzo.pause()
  142.                     elif filter == 2:
  143.                         logging.warning(Ta('WARNING: Aborted job "%s" because of rating (%s)'), nzo.final_name, reason)
  144.                         nzo.fail_msg = T('Aborted, rating filter matched (%s)') % reason
  145.                         import sabnzbd.nzbqueue
  146.                         sabnzbd.nzbqueue.NzbQueue.do.end_job(nzo)
  147.  
  148.                     nzf.completed = True
  149.             else:
  150.                 sabnzbd.nzbqueue.NzbQueue.do.remove(nzo.nzo_id, add_to_history=False, cleanup=False)
  151.                 PostProcessor.do.process(nzo)
  152.  
  153.  
  154. def _assemble(nzf, path, dupe):
  155.     if os.path.exists(path):
  156.         unique_path = get_unique_filename(path)
  157.         if dupe:
  158.             path = unique_path
  159.         else:
  160.             renamer(path, unique_path)
  161.  
  162.     fout = open(path, 'ab')
  163.  
  164.     if cfg.quick_check():
  165.         md5 = new_md5()
  166.     else:
  167.         md5 = None
  168.  
  169.     decodetable = nzf.decodetable
  170.  
  171.     for articlenum in decodetable:
  172.         # Break if deleted during writing
  173.         if nzf.nzo.status is Status.DELETED:
  174.             break
  175.  
  176.         # Sleep to allow decoder/assembler switching
  177.         sleep(0.001)
  178.         article = decodetable[articlenum]
  179.  
  180.         data = ArticleCache.do.load_article(article)
  181.  
  182.         if not data:
  183.             logging.info(T('%s missing'), article)
  184.         else:
  185.             # yenc data already decoded, flush it out
  186.             fout.write(data)
  187.             if md5:
  188.                 md5.update(data)
  189.  
  190.     fout.flush()
  191.     fout.close()
  192.     set_permissions(path)
  193.     if md5:
  194.         nzf.md5sum = md5.digest()
  195.         del md5
  196.  
  197.     return path
  198.  
  199.  
  200. def file_has_articles(nzf):
  201.     """ Do a quick check to see if any articles are present for this file.
  202.        Destructive: only to be used to differentiate between unknown encoding and no articles.
  203.    """
  204.     has = False
  205.     decodetable = nzf.decodetable
  206.     for articlenum in decodetable:
  207.         sleep(0.01)
  208.         article = decodetable[articlenum]
  209.         data = ArticleCache.do.load_article(article)
  210.         if data:
  211.             has = True
  212.     return has
  213.  
  214.  
  215. # For a full description of the par2 specification, visit:
  216. # http://parchive.sourceforge.net/docs/specifications/parity-volume-spec/article-spec.html
  217.  
  218. def GetMD5Hashes(fname, force=False):
  219.     """ Get the hash table from a PAR2 file
  220.        Return as dictionary, indexed on names and True for utf8-encoded names
  221.    """
  222.     new_encoding = True
  223.     table = {}
  224.     if force or not flag_file(os.path.split(fname)[0], QCHECK_FILE):
  225.         try:
  226.             f = open(fname, 'rb')
  227.         except:
  228.             return table, new_encoding
  229.  
  230.         new_encoding = False
  231.         try:
  232.             header = f.read(8)
  233.             while header:
  234.                 name, hash = ParseFilePacket(f, header)
  235.                 new_encoding |= is_utf8(name)
  236.                 if name:
  237.                     table[name] = hash
  238.                 header = f.read(8)
  239.  
  240.         except (struct.error, IndexError):
  241.             logging.info('Cannot use corrupt par2 file for QuickCheck, "%s"', fname)
  242.             table = {}
  243.         except:
  244.             logging.debug('QuickCheck parser crashed in file %s', fname)
  245.             logging.info('Traceback: ', exc_info=True)
  246.             table = {}
  247.  
  248.         f.close()
  249.     return table, new_encoding
  250.  
  251.  
  252. def ParseFilePacket(f, header):
  253.     """ Look up and analyze a FileDesc package """
  254.  
  255.     nothing = None, None
  256.  
  257.     if header != 'PAR2\0PKT':
  258.         return nothing
  259.  
  260.     # Length must be multiple of 4 and at least 20
  261.     len = struct.unpack('<Q', f.read(8))[0]
  262.     if int(len / 4) * 4 != len or len < 20:
  263.         return nothing
  264.  
  265.     # Next 16 bytes is md5sum of this packet
  266.     md5sum = f.read(16)
  267.  
  268.     # Read and check the data
  269.     data = f.read(len - 32)
  270.     md5 = new_md5()
  271.     md5.update(data)
  272.     if md5sum != md5.digest():
  273.         return nothing
  274.  
  275.     # The FileDesc packet looks like:
  276.     # 16 : "PAR 2.0\0FileDesc"
  277.     # 16 : FileId
  278.     # 16 : Hash for full file **
  279.     # 16 : Hash for first 16K
  280.     #  8 : File length
  281.     # xx : Name (multiple of 4, padded with \0 if needed) **
  282.  
  283.     # See if it's the right packet and get name + hash
  284.     for offset in range(0, len, 8):
  285.         if data[offset:offset + 16] == "PAR 2.0\0FileDesc":
  286.             hash = data[offset + 32:offset + 48]
  287.             filename = data[offset + 72:].strip('\0')
  288.             return filename, hash
  289.  
  290.     return nothing
  291.  
  292.  
  293. RE_SUBS = re.compile(r'\W+sub|subs|subpack|subtitle|subtitles(?![a-z])', re.I)
  294. def is_cloaked(path, names):
  295.     """ Return True if this is likely to be a cloaked encrypted post """
  296.     fname = unicoder(os.path.split(path)[1]).lower()
  297.     fname = os.path.splitext(fname)[0]
  298.     for name in names:
  299.         name = os.path.split(name.lower())[1]
  300.         name, ext = os.path.splitext(unicoder(name))
  301.         if ext == u'.rar' and fname.startswith(name) and (len(fname) - len(name)) < 8 and len(names) < 3 and not RE_SUBS.search(fname):
  302.             logging.debug('File %s is probably encrypted due to RAR with same name inside this RAR', fname)
  303.             return True
  304.         elif 'password' in name:
  305.             logging.debug('RAR %s is probably encrypted: "password" in filename %s', fname, name)
  306.             return True
  307.     return False
  308.  
  309.  
  310. def check_encrypted_rar(nzo, filepath):
  311.     """ Check if file is rar and is encrypted """
  312.     encrypted = False
  313.     if nzo.encrypted == 0 and not nzo.password and not nzo.meta.get('password') and cfg.pause_on_pwrar() and is_rarfile(filepath):
  314.         try:
  315.             zf = RarFile(filepath, all_names=True)
  316.             encrypted = zf.encrypted or is_cloaked(filepath, zf.namelist())
  317.             if encrypted and not nzo.reuse:
  318.                 nzo.encrypted = 1
  319.             else:
  320.                 # Don't check other files
  321.                 nzo.encrypted = -1
  322.                 encrypted = False
  323.             zf.close()
  324.             del zf
  325.         except:
  326.             logging.debug('RAR file %s cannot be inspected', filepath)
  327.     return encrypted
  328.  
  329. def rarfilecontents(myrarfile):
  330.     cmd = 'rar lb ' + myrarfile # sabnzbd.newsunpack.RAR_COMMAND
  331.     cmd = sabnzbd.newsunpack.RAR_COMMAND + ' lb ' + myrarfile
  332.     result = os.popen(cmd).readlines()
  333.     print "SJ:", result
  334.     return map(str.rstrip, result )
  335.     '''
  336.     for thisline in result:
  337.         print thisline.rstrip()
  338.     '''
  339.  
  340.  
  341. def rar_contains_unwanted_file(filepath):
  342.     # checks for unwanted extensions in the rar file 'filepath'
  343.     # ... unwanted extensions are defined in global variable cfg.unwanted_extensions()
  344.     # returns False if no unwanted extensions are found in the rar file
  345.     # returns name of file if unwanted extension is found in the rar file
  346.     unwanted = None
  347.     if cfg.unwanted_extensions() and is_rarfile(filepath):
  348.         logging.debug('SJ: rar file to check: %s',filepath)
  349.         # logging.debug('unwanted extensions are: %s', cfg.unwanted_extensions())
  350.         try:
  351.             '''
  352.            zf = RarFile(filepath, all_names=True)
  353.            logging.debug('SJ: files in rar file: %s', zf.namelist())
  354.            for somefile in zf.namelist():
  355.            '''
  356.             for somefile in rarfilecontents(filepath):
  357.                 logging.debug('file in rar file: %s', somefile)
  358.                 if os.path.splitext(somefile)[1].replace('.', '').lower() in cfg.unwanted_extensions():
  359.                     logging.debug('Unwanted file %s', somefile)
  360.                     unwanted = somefile
  361.                     zf.close()
  362.         except:
  363.             logging.debug('RAR file %s cannot be inspected.', filepath)
  364.     return unwanted
  365.  
  366.  
  367. def nzo_filtered_by_rating(nzo):
  368.     if Rating.do and cfg.rating_enable() and cfg.rating_filter_enable() and (nzo.rating_filtered < 2):
  369.         rating = Rating.do.get_rating_by_nzo(nzo.nzo_id)
  370.         if rating is not None:
  371.             nzo.rating_filtered = 1
  372.             reason = rating_filtered(rating, nzo.filename.lower(), True)
  373.             if reason is not None:
  374.                 return (2, reason)
  375.             reason = rating_filtered(rating, nzo.filename.lower(), False)
  376.             if reason is not None:
  377.                 return (1, reason)
  378.     return (0, "")
  379.  
  380.  
  381. def rating_filtered(rating, filename, abort):
  382.     def check_keyword(keyword):
  383.         clean_keyword = keyword.strip().lower()
  384.         return (len(clean_keyword) > 0) and (clean_keyword in filename)
  385.     audio = cfg.rating_filter_abort_audio() if abort else cfg.rating_filter_pause_audio()
  386.     video = cfg.rating_filter_abort_video() if abort else cfg.rating_filter_pause_video()
  387.     spam = cfg.rating_filter_abort_spam() if abort else cfg.rating_filter_pause_spam()
  388.     spam_confirm = cfg.rating_filter_abort_spam_confirm() if abort else cfg.rating_filter_pause_spam_confirm()
  389.     encrypted = cfg.rating_filter_abort_encrypted() if abort else cfg.rating_filter_pause_encrypted()
  390.     encrypted_confirm = cfg.rating_filter_abort_encrypted_confirm() if abort else cfg.rating_filter_pause_encrypted_confirm()
  391.     downvoted = cfg.rating_filter_abort_downvoted() if abort else cfg.rating_filter_pause_downvoted()
  392.     keywords = cfg.rating_filter_abort_keywords() if abort else cfg.rating_filter_pause_keywords()
  393.     if (video > 0) and (rating.avg_video > 0) and (rating.avg_video <= video):
  394.         return T('video')
  395.     if (audio > 0) and (rating.avg_audio > 0) and (rating.avg_audio <= audio):
  396.         return T('audio')
  397.     if (spam and ((rating.avg_spam_cnt > 0) or rating.avg_encrypted_confirm)) or (spam_confirm and rating.avg_spam_confirm):
  398.         return T('spam')
  399.     if (encrypted and ((rating.avg_encrypted_cnt > 0) or rating.avg_encrypted_confirm)) or (encrypted_confirm and rating.avg_encrypted_confirm):
  400.         return T('passworded')
  401.     if downvoted and (rating.avg_vote_up < rating.avg_vote_down):
  402.         return T('downvoted')
  403.     if any(check_keyword(k) for k in keywords.split(',')):
  404.         return T('keywords')
  405.     return None
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement