Advertisement
Guest User

Untitled

a guest
Apr 26th, 2018
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.02 KB | None | 0 0
  1. from __future__ import division
  2.  
  3. import hashlib
  4. import logging
  5. import os
  6. import re
  7.  
  8. __all__ = [
  9. 'is_unsplitable',
  10. 'get_root_of_unsplitable',
  11. 'Pieces',
  12. ]
  13.  
  14. UNSPLITABLE_FILE_EXTENSIONS = [
  15. set(['.rar', '.sfv']),
  16. set(['.mp3', '.sfv']),
  17. set(['.vob', '.ifo']),
  18. ]
  19.  
  20. logger = logging.getLogger(__name__)
  21.  
  22. def is_unsplitable(files):
  23. """
  24. Checks if a list of files can be considered unsplitable, e.g. VOB/IFO or scene release.
  25. This means the files can only be used in this combination.
  26. """
  27. extensions = set(os.path.splitext(f)[1].lower() for f in files)
  28. found_unsplitable_extensions = False
  29. for exts in UNSPLITABLE_FILE_EXTENSIONS:
  30. if len(extensions & exts) == len(exts):
  31. found_unsplitable_extensions = True
  32. break
  33.  
  34. lowercased_files = set([f.lower() for f in files])
  35. found_magic_file = False
  36. if 'movieobject.bdmv' in lowercased_files:
  37. found_magic_file = True
  38.  
  39. return found_unsplitable_extensions or found_magic_file
  40.  
  41. def get_root_of_unsplitable(path):
  42. """
  43. Scans a path for the actual scene release name, e.g. skipping cd1 folders.
  44.  
  45. Returns None if no scene folder could be found
  46. """
  47. path = path[::-1]
  48. for p in path:
  49. if not p:
  50. continue
  51.  
  52. if re.match(r'^(cd[1-9])|(samples?)|(proofs?)|((vob)?sub(title)?s?)$', p, re.IGNORECASE): # scene paths
  53. continue
  54.  
  55. if re.match(r'^(bdmv)|(disc\d*)|(video_ts)$', p, re.IGNORECASE): # bluray / dd
  56. continue
  57.  
  58.  
  59. return p
  60.  
  61. class Pieces(object):
  62. """
  63. Can help check if files match the files found in a torrent.
  64. """
  65.  
  66. def __init__(self, torrent):
  67. self.piece_size = torrent[b'info'][b'piece length']
  68. self.pieces = []
  69. for i in range(0, len(torrent[b'info'][b'pieces']), 20):
  70. self.pieces.append(torrent[b'info'][b'pieces'][i:i+20])
  71.  
  72. def get_complete_pieces(self, start_size, end_size):
  73. """
  74. Finds complete pieces and returns the alignment needed from
  75. the beginning and the end (to match the file).
  76. """
  77. logger.debug('Getting complete pieces for file starting at %i and ending at %i. Piece size is %i' % (start_size, end_size, self.piece_size))
  78.  
  79. start_piece, start_offset = divmod(start_size, self.piece_size)
  80. if start_offset:
  81. start_piece += 1
  82.  
  83. if start_offset:
  84. start_offset = self.piece_size - start_offset
  85.  
  86. end_piece, end_offset = divmod(end_size, self.piece_size)
  87.  
  88. logger.debug('Start piece:%i end piece:%i' % (start_piece, end_piece-1))
  89. return start_offset, end_offset, self.pieces[start_piece:end_piece]
  90.  
  91. def find_piece_breakpoint(self, file_path, start_size, end_size):
  92. """
  93. Finds the point where a file with a different size is modified and tries to align it with pieces.
  94. """
  95. start_offset, end_offset, pieces = self.get_complete_pieces(start_size, end_size)
  96.  
  97. failed_pieces = (len(pieces) // 20) or 1 # number of pieces that can fail in a row and then put an end to checking
  98. success_count = failed_pieces
  99. piece_status = []
  100.  
  101. with open(file_path, 'rb') as f:
  102. f.seek(start_offset)
  103. for i, piece in enumerate(pieces):
  104. logger.debug('Checking piece %i for breakingpoint' % (i, ))
  105. h = hashlib.sha1(f.read(self.piece_size)).digest()
  106. if h == piece:
  107. logger.debug('Piece %i matched' % i)
  108. if success_count < failed_pieces:
  109. success_count += 1
  110. piece_status.append(True)
  111. else:
  112. logger.debug('Piece %i did not match' % i)
  113. success_count -= 1
  114. piece_status.append(False)
  115.  
  116. if success_count <= 0:
  117. logger.debug('The breakingpoint has been found after piece %i - more than %i failed pieces' % (i, failed_pieces))
  118. break
  119.  
  120. for p in piece_status[::-1]:
  121. if p:
  122. break
  123. i -= 1
  124.  
  125. breakingpoint = start_offset + self.piece_size*i
  126. logger.debug('A total of %i pieces were ok, so we set breakingpoint at %i' % (i, breakingpoint))
  127. return breakingpoint
  128.  
  129. def match_file(self, file_path, start_size, end_size):
  130. """
  131. Try to match file starting at start_size and ending at end_size.
  132. """
  133. start_offset, end_offset, pieces = self.get_complete_pieces(start_size, end_size)
  134. logger.debug('Stuff to check start_offset:%i end_offset:%i pieces:%s' % (start_offset, end_offset, len(pieces)))
  135. if not pieces:
  136. logger.debug('No whole pieces found for %r, taking this as a not-match' % file_path)
  137. return False, False
  138.  
  139. check_pieces = (len(pieces) // 10) or 1
  140.  
  141. match_start, match_end = 0, 0
  142. size = os.path.getsize(file_path)
  143. with open(file_path, 'rb') as f:
  144. for i in range(check_pieces): # check from beginning
  145. seek_offset = start_offset+self.piece_size*i
  146. logger.debug('Checking piece %i from beginning of file, reading from %i bytes. Filesize: %i' % (i, seek_offset, size))
  147. f.seek(seek_offset)
  148. h = hashlib.sha1(f.read(self.piece_size)).digest()
  149. logger.debug('Matching hash %r against %r' % (h, pieces[i]))
  150. if h == pieces[i]:
  151. logger.debug('Piece %i matched' % i)
  152. match_start += 1
  153. else:
  154. logger.debug('Piece %i did not match' % i)
  155.  
  156. for i in range(check_pieces): # check from end
  157. seek_offset = size-end_offset-self.piece_size*(i+1)
  158. logger.debug('Checking piece %i from end of file, reading from %i bytes. Filesize: %i' % (i, seek_offset, size))
  159. f.seek(seek_offset)
  160. h = hashlib.sha1(f.read(self.piece_size)).digest()
  161. piece = pieces[(i+1)*-1]
  162. logger.debug('Matching hash %r against %r' % (h, piece))
  163. if h == piece:
  164. logger.debug('Piece %i matched' % i)
  165. match_end += 1
  166. else:
  167. logger.debug('Piece %i did not match' % i)
  168.  
  169. logger.debug('Checked %i pieces from both start and end. %i matched from start and %i matched from end.' % (check_pieces, match_start, match_end))
  170.  
  171. if check_pieces < 4:
  172. must_match = 1
  173. elif check_pieces < 10:
  174. must_match = 2
  175. else:
  176. must_match = max(check_pieces // 10, 3)
  177.  
  178. return (match_start and check_pieces - match_start <= must_match,
  179. match_end and check_pieces - match_end <= must_match)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement