Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 38.86 KB | None | 0 0
  1. #!/usr/bin/python3
  2.  
  3. # Torrentverify
  4. # Copyright (c) 2015-2016 Wintermute0110 <wintermute0110@gmail.com>
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining a copy
  7. # of this software and associated documentation files (the "Software"), to deal
  8. # in the Software without restriction, including without limitation the rights
  9. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. # copies of the Software, and to permit persons to whom the Software is
  11. # furnished to do so, subject to the following conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be included in
  14. # all copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. # THE SOFTWARE.
  23. #
  24. # Bencoder code based on Bencodepy by Eric Weast (c) 2014
  25. # Licensed under the GPL v2
  26. # https://github.com/eweast/BencodePy/commits/master
  27. import io
  28. import sys
  29. import os
  30. import hashlib
  31. import argparse
  32. import shutil
  33. from collections import OrderedDict
  34.  
  35. # --- Global variables
  36. __software_version = '0.1.0';
  37.  
  38. # --- Program options (from command line)
  39. __prog_options_override_torrent_dir = 0
  40. __prog_options_deleteWrongSizeFiles = 0
  41. __prog_options_truncateWrongSizeFiles = 0
  42. __prog_options_deleteUnneeded = 0
  43.  
  44. # Unified torrent information object. Works for torrent files with 1 or several
  45. # files.
  46. class Torrent:
  47.   torrent_file = None
  48.   dir_name = None
  49.   piece_length = 0
  50.   num_pieces = 0
  51.   num_files = 0
  52.   file_name_list = []
  53.   file_length_list = []
  54.   pieces_hash_list = []
  55.   pieces_file_list = []
  56.  
  57. # --- Get size of terminal ---
  58. # shutil.get_terminal_size() only available in Python 3.3
  59. # https://docs.python.org/3/library/shutil.html#querying-the-size-of-the-output-terminal
  60. # print(sys.version_info)
  61. if sys.version_info < (3, 3, 0):
  62.   # Disable long text-line chopping
  63.   __cols = -1
  64.   print('[NOTE] Your Python version is lower than 3.3.0. Terminal size cannot be determined.')
  65.   print('[NOTE] Chopping of long text lines disabled.')
  66. else:
  67.   __cols, __lines = shutil.get_terminal_size()
  68.   # print('{0} cols and {1} lines'.format(__cols, __lines))
  69.  
  70. # --- Bdecoder ----------------------------------------------------------------
  71. class DecodingError(Exception):
  72.   def __init__(self, msg):
  73.     self.msg = msg
  74.  
  75.   def __str__(self):
  76.     return repr(self.msg)
  77.  
  78. class Decoder:
  79.   def __init__(self, data: bytes):
  80.     self.data = data
  81.     self.idx = 0
  82.  
  83.   def __read(self, i: int) -> bytes:
  84.     """Returns a set number (i) of bytes from self.data."""
  85.     b = self.data[self.idx: self.idx + i]
  86.     self.idx += i
  87.     if len(b) != i:
  88.       raise DecodingError(
  89.          "Incorrect byte length returned between indexes of {0} and {1}. Possible unexpected End of File."
  90.          .format(str(self.idx), str(self.idx - i)))
  91.     return b
  92.  
  93.   def __read_to(self, terminator: bytes) -> bytes:
  94.     """Returns bytes from self.data starting at index (self.idx) until terminator character."""
  95.     try:
  96.       # noinspection PyTypeChecker
  97.       i = self.data.index(terminator, self.idx)
  98.       b = self.data[self.idx:i]
  99.       self.idx = i + 1
  100.       return b
  101.     except ValueError:
  102.       raise DecodingError(
  103.           'Unable to locate terminator character "{0}" after index {1}.'.format(str(terminator), str(self.idx)))
  104.  
  105.   def __parse(self) -> object:
  106.     """Selects the appropriate method to decode next bencode element and returns the result."""
  107.     char = self.data[self.idx: self.idx + 1]
  108.     if char in [b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'0']:
  109.       str_len = int(self.__read_to(b':'))
  110.       return self.__read(str_len)
  111.     elif char == b'i':
  112.       self.idx += 1
  113.       return int(self.__read_to(b'e'))
  114.     elif char == b'd':
  115.       return self.__parse_dict()
  116.     elif char == b'l':
  117.       return self.__parse_list()
  118.     elif char == b'':
  119.       raise DecodingError('Unexpected End of File at index position of {0}.'.format(str(self.idx)))
  120.     else:
  121.       raise DecodingError('Invalid token character ({0}) at position {1}.'.format(str(char), str(self.idx)))
  122.  
  123.   def decode(self):
  124.     """Start of decode process. Returns final results."""
  125.     if self.data[0:1] not in (b'd', b'l'):
  126.       return self.__wrap_with_tuple()
  127.     return self.__parse()
  128.  
  129.   def __wrap_with_tuple(self) -> tuple:
  130.     """Returns a tuple of all nested bencode elements."""
  131.     l = list()
  132.     length = len(self.data)
  133.     while self.idx < length:
  134.       l.append(self.__parse())
  135.     return tuple(l)
  136.  
  137.   def __parse_dict(self) -> OrderedDict:
  138.     """Returns an Ordered Dictionary of nested bencode elements."""
  139.     self.idx += 1
  140.     d = OrderedDict()
  141.     key_name = None
  142.     while self.data[self.idx: self.idx + 1] != b'e':
  143.       if key_name is None:
  144.         key_name = self.__parse()
  145.       else:
  146.         d[key_name] = self.__parse()
  147.         key_name = None
  148.     self.idx += 1
  149.     return d
  150.  
  151.   def __parse_list(self) -> list:
  152.     """Returns an list of nested bencode elements."""
  153.     self.idx += 1
  154.     l = []
  155.     while self.data[self.idx: self.idx + 1] != b'e':
  156.       l.append(self.__parse())
  157.     self.idx += 1
  158.     return l
  159.  
  160. # --- Functions ---------------------------------------------------------------
  161. def query_yes_no_all(question, default="no"):
  162.   """Ask a yes/no question via raw_input() and return their answer.
  163.  
  164.  "question" is a string that is presented to the user.
  165.  "default" is the presumed answer if the user just hits <Enter>.
  166.      It must be "yes" (the default), "no" or None (meaning
  167.      an answer is required of the user).
  168.  
  169.  The "answer" return value is True for "yes" or False for "no".
  170.  """
  171.   valid = {"yes": 1,  "y": 1, "ye": 1,
  172.            "no": 0,   "n": 0,
  173.            "all": -1, "a": -1}
  174.   if default is None:
  175.     prompt = " [y/n/a] "
  176.   elif default == "yes":
  177.     prompt = " [Y/n/a] "
  178.   elif default == "no":
  179.     prompt = " [y/N/a] "
  180.   elif default == "all":
  181.     prompt = " [y/n/A] "
  182.   else:
  183.     raise ValueError("invalid default answer: '%s'" % default)
  184.  
  185.   while True:
  186.     sys.stdout.write(question + prompt)
  187.     choice = input().lower()
  188.     if default is not None and choice == '':
  189.       return valid[default]
  190.     elif choice in valid:
  191.       return valid[choice]
  192.     else:
  193.       sys.stdout.write("Please respond with 'yes', 'no' or 'all'"
  194.                        " (or 'y' or 'n' or 'a').\n")
  195.  
  196. def confirm_file_action(action_str, result_str, force_delete):
  197.   delete_file = 0
  198.   if force_delete:
  199.     delete_file = 1
  200.   else:
  201.     result = query_yes_no_all('{0} this file?'.format(action_str))
  202.     if result == 1:
  203.       delete_file = 1
  204.       print('File {0}'.format(result_str))
  205.     elif result == 0:
  206.       delete_file = 0
  207.       print('File not deleted')
  208.     elif result == -1:
  209.       delete_file = 1
  210.       force_delete = True
  211.       print('File {0}'.format(result_str))
  212.     else:
  213.       print('Logic error')
  214.  
  215.   return (delete_file, force_delete)
  216.  
  217. # If max_length == -1 it means size of terminal could not be determined. Do
  218. # nothing witht the string.
  219. def limit_string_lentgh(string, max_length):
  220.   if max_length > 1 and len(string) > max_length:
  221.     string = (string[:max_length-1] + '*');
  222.  
  223.   return string
  224.  
  225. # Convert a list of bytes into a path
  226. def join_file_byte_list(file_list_bytes):
  227.   file_list_string = []
  228.   for i in range(len(file_list_bytes)):
  229.     file_list_string.append(file_list_bytes[i].decode("utf-8"))
  230.  
  231.   return '/'.join(file_list_string)
  232.  
  233. # Returns a Torrent object with torrent metadata
  234. __debug_torrent_extract_metadata = 0
  235. def extract_torrent_metadata(filename):
  236.   torrent = Torrent
  237.   torrent.torrent_file = filename
  238.  
  239.   sys.stdout.write('Bdecoding torrent file {0}... '.format(torrentFileName))
  240.   sys.stdout.flush()
  241.   torrent_file = open(torrentFileName, "rb")
  242.   # Use internal Bdecoder class
  243.   decoder = Decoder(torrent_file.read())
  244.   torr_ordered_dict = decoder.decode()
  245.   info_ordered_dict = torr_ordered_dict[b'info']
  246.   sys.stdout.write('done\n')
  247.  
  248.   if __debug_torrent_extract_metadata:
  249.     print('=== Dumping torrent root ===')
  250.     for key in torr_ordered_dict:
  251.       print(' key {0} value {1}'.format(key, torr_ordered_dict[key]))
  252.  
  253.     print('=== Dumping torrent info ===')
  254.     for key in info_ordered_dict:
  255.       print(' key {0} value {1}'.format(key, info_ordered_dict[key]))
  256.  
  257.   # If torrent info has files field then torrent has several files
  258.   if b'files' in info_ordered_dict:
  259.     t_name = info_ordered_dict[b'name'] # Directory name to store torrent
  260.     t_piece_length = info_ordered_dict[b'piece length']
  261.     t_files_list = info_ordered_dict[b'files']
  262.    
  263.     # --- Converts the string into a file-like object
  264.     t_pieces = info_ordered_dict[b'pieces']
  265.     pieces = io.BytesIO(t_pieces)
  266.     # --- Ensure num_pieces is integer
  267.     num_pieces = len(t_pieces) / 20
  268.     if not num_pieces.is_integer():
  269.       print('num_pieces {0} is not integer!'.format(num_pieces))
  270.       sys.exit(1)
  271.     num_pieces = int(num_pieces)
  272.  
  273.     # --- Fill torrent object
  274.     torrent.dir_name = t_name.decode("utf-8")
  275.     torrent.piece_length = t_piece_length
  276.     torrent.num_pieces = num_pieces
  277.     torrent.num_files = len(t_files_list)
  278.     for i in range(num_pieces):
  279.       hash = pieces.read(20)
  280.       torrent.pieces_hash_list.append(hash)
  281.     torrent.total_bytes = 0
  282.     for t_file in t_files_list:
  283.       torrent.file_name_list.append(join_file_byte_list(t_file[b'path']))
  284.       # print(type(t_file[b'length'])) # type is <class 'int'>
  285.       torrent.file_length_list.append(t_file[b'length'])
  286.       torrent.total_bytes += t_file[b'length']
  287.  
  288.     # DEBUG
  289.     if __debug_torrent_extract_metadata:
  290.       print(' Directory {0}'.format(t_name))
  291.       print(' Piece length {0}'.format(t_piece_length))
  292.       print(' Number of pieces {0}'.format(num_pieces))
  293.       print(' Number of files {0}'.format(len(t_files_list)))
  294.       print(' len(t_pieces) =  {0}'.format(len(t_pieces)))
  295.       print(' num_pieces * piece_length = {0}'.format(num_pieces * t_piece_length))
  296.       print(' len(torrent.pieces_hash_list) = {0}'.format(len(torrent.pieces_hash_list)))
  297.      
  298.   # Single file torrent
  299.   else:
  300.     t_name = info_ordered_dict[b'name'] # File name11
  301.     t_piece_length = info_ordered_dict[b'piece length']
  302.     t_length = info_ordered_dict[b'length']
  303.    
  304.     # --- Converts the string into a file-like object
  305.     t_pieces = info_ordered_dict[b'pieces']
  306.     pieces = io.BytesIO(t_pieces)
  307.     # --- Ensure num_pieces is integer
  308.     num_pieces = len(t_pieces) / 20
  309.     if not num_pieces.is_integer():
  310.       print('num_pieces {0} is not integer!'.format(num_pieces))
  311.       sys.exit(1)
  312.     num_pieces = int(num_pieces)
  313.  
  314.     # --- Fill torrent object
  315.     torrent.piece_length = t_piece_length
  316.     torrent.num_pieces = num_pieces
  317.     torrent.num_files = 1
  318.     torrent.file_name_list.append(t_name)
  319.     torrent.file_length_list.append(t_length)
  320.     for i in range(num_pieces):
  321.       hash = pieces.read(20)
  322.       torrent.pieces_hash_list.append(hash)
  323.     torrent.total_bytes = t_length
  324.  
  325.     # DEBUG
  326.     if __debug_torrent_extract_metadata:
  327.       print(' Filename {0}'.format(t_name))
  328.       print(' Size {0}'.format(t_length))
  329.       print(' Piece length {0}'.format(t_piece_length))
  330.       print(' Number of pieces {0}'.format(num_pieces))
  331.       print(' Number of files {0}'.format(1))
  332.       print(' len(t_pieces) =  {0}'.format(len(t_pieces)))
  333.       print(' num_pieces * piece_length = {0}'.format(num_pieces * t_piece_length))
  334.       print(' len(torrent.pieces_hash_list) = {0}'.format(len(torrent.pieces_hash_list)))
  335.  
  336.   # Make a list of files for each piece. Should include also the file offsets.
  337.   # This is to find torrent that has padded files that must be trimmend.
  338.   # Many Linux torrent clients have this bug in ext4 filesystems.
  339.   # [ [{'file_idx': 0, 'start_offset': 1234, 'end_offset': 5678},
  340.   #    { ... } ],
  341.   #   [  ...   ],
  342.   #   ...
  343.   # ]
  344.   piece_length = torrent.piece_length
  345.   pieces_file_list = []
  346.   piece_current_length = 0
  347.   this_piece_files_list = []
  348.   for i in range(torrent.num_files):
  349.     file_dict = {}
  350.     file_dict['file_idx'] = i
  351.     file_dict['start_offset'] = 0
  352.     file_size = file_current_size = torrent.file_length_list[i]
  353.     while True:
  354.       remaining_piece_bytes = piece_length - piece_current_length
  355.       if file_current_size > remaining_piece_bytes:
  356.         piece_current_length += remaining_piece_bytes
  357.         file_current_size -= remaining_piece_bytes
  358.       else:
  359.         piece_current_length += file_current_size
  360.         file_current_size = 0
  361.       # Go for next file if no more bytes
  362.       if file_current_size == 0:
  363.         file_dict['end_offset'] = file_size
  364.         this_piece_files_list.append(file_dict)
  365.         break
  366.       # Piece is ready, add to the list
  367.       file_dict['end_offset'] = file_size - file_current_size
  368.       this_piece_files_list.append(file_dict)
  369.       pieces_file_list.append(this_piece_files_list)
  370.       # Reset piece files list and size
  371.       piece_current_length = 0
  372.       this_piece_files_list = []
  373.       # Add current file to piece files list
  374.       file_dict = {}
  375.       file_dict['file_idx'] = i
  376.       file_dict['start_offset'] = file_size - file_current_size
  377.   # Last piece
  378.   if piece_current_length > 0:
  379.     pieces_file_list.append(this_piece_files_list)
  380.    
  381.   # Put in torrent object
  382.   torrent.pieces_file_list = pieces_file_list
  383.  
  384.   # DEBUG: print list of files per piece
  385.   if __debug_torrent_extract_metadata:
  386.     for piece_idx in range(len(pieces_file_list)):
  387.       print('Piece {0:06d}'.format(piece_idx))
  388.       this_piece_files_list = pieces_file_list[piece_idx]
  389.       for file_idx in range(len(this_piece_files_list)):
  390.         file_dict = this_piece_files_list[file_idx]
  391.         print(' File {0:06d} start {1:8d} end {2:8d}'
  392.           .format(file_dict['file_idx'], file_dict['start_offset'], file_dict['end_offset']))
  393.  
  394.   return torrent
  395.  
  396. def list_torrent_contents(torrent):
  397.   print('Printing torrent file contents...')
  398.  
  399.   # --- Print list of files
  400.   text_size = 7 + 17 + 1
  401.   for i in range(len(torrent.file_name_list)):
  402.     print('{2}'.format(i+1, torrent.file_length_list[i], \
  403.       limit_string_lentgh(torrent.file_name_list[i], __cols -text_size)))
  404.   return 0
  405.  
  406. # Checks that files listed in the torrent file exist, and that file size
  407. # is correct
  408. # Status can be: OK, MISSING, BAD_SIZE
  409. def check_torrent_files_only(torrent):
  410.   print('Checking torrent files and sizes (NOT hash)')
  411.   ret_value = 0
  412.   num_files_OK = 0
  413.   num_files_bigger_size = 0
  414.   num_files_smaller_size = 0
  415.   num_files_missing = 0
  416.   force_delete = False
  417.   force_truncate = False
  418.   num_deleted_files = 0
  419.   num_truncated_files = 0
  420.   print('    F#   Status     Actual Bytes    Torrent Bytes  File name')
  421.   print('------ -------- ---------------- ----------------  --------------')
  422.   for i in range(len(torrent.file_name_list)):
  423.     file_size = -1
  424.     filename_path = os.path.join(torrent.dir_data, torrent.file_name_list[i])
  425.     # print(filename_path)
  426.     file_exists = os.path.isfile(filename_path)
  427.     if file_exists:
  428.       file_size = os.path.getsize(filename_path)
  429.       if file_size == torrent.file_length_list[i]:
  430.         status = 'OK'
  431.         num_files_OK += 1
  432.       else:
  433.         ret_value = 1
  434.         status = 'BAD_SIZE'
  435.         if file_size > torrent.file_length_list[i]:
  436.           num_files_bigger_size += 1
  437.         else:
  438.           num_files_smaller_size += 1
  439.     else:
  440.       ret_value = 1
  441.       status = 'MISSING'
  442.       num_files_missing += 1
  443.  
  444.     # --- Print file info
  445.     text_size = 7+9+17+17+1
  446.     print('{0:6} {1:>8} {2:16,} {3:16,}  {4}'
  447.       .format(i+1, status, file_size, torrent.file_length_list[i],
  448.               limit_string_lentgh(torrent.file_name_list[i], __cols -text_size)))
  449.  
  450.     # --- Delete wrong size files (mutually exclusive with truncate)
  451.     if status == 'BAD_SIZE' and file_size != torrent.file_length_list[i]:
  452.       if __prog_options_deleteWrongSizeFiles:
  453.         print('RM  {0}'.format(filename_path))
  454.         # This option is very dangerous if user writes the wrong directory
  455.         # Always confirm with user
  456.         delete_file, force_delete = confirm_file_action('Delete', 'deleted', force_delete)
  457.         if delete_file:
  458.           os.unlink(torrent.file_length_list[i])
  459.           num_deleted_files += 1
  460.  
  461.     # --- Truncate bigger size files
  462.     if status == 'BAD_SIZE' and file_size > torrent.file_length_list[i]:
  463.       if __prog_options_truncateWrongSizeFiles:
  464.         print('TRUNCATE  {0}'.format(filename_path))
  465.         # This option is very dangerous if user writes the wrong directory
  466.         # Always confirm with user
  467.         truncate_file, force_truncate = confirm_file_action('Truncate', 'truncated', force_truncate)
  468.         if truncate_file:
  469.           # w+ mode truncates the file, but the file if filled with zeros!
  470.           # According to Python docs, r+ is for both read and writing, should work.
  471.           fo = open(filename_path, "r+b")
  472.           fo.truncate(torrent.file_length_list[i])
  473.           fo.close()
  474.           num_truncated_files += 1
  475.  
  476.   # --- Print torrent metadata
  477.   print('')
  478.   print('Torrent file       : {0}'.format(torrent.torrent_file))
  479.   print('Pieces info        : {0:10,} pieces, {1:16,} bytes/piece'.format(torrent.num_pieces, torrent.piece_length))
  480.   print('Files info         : {0:10,} files,  {1:16,} total bytes'.format(torrent.num_files, torrent.total_bytes))
  481.   print('Torrent directory  : {0}'.format(torrent.dir_name))
  482.   print('Download directory : {0}'.format(torrent.dir_download))
  483.   print('Data directory     : {0}'.format(torrent.dir_data))
  484.   print('Files OK           : {0:,}'.format(num_files_OK))
  485.   print('Files w big size   : {0:,}'.format(num_files_bigger_size))
  486.   print('Files w small size : {0:,}'.format(num_files_smaller_size))
  487.   print('Files missing      : {0:,}'.format(num_files_missing))
  488.   if __prog_options_deleteWrongSizeFiles:
  489.     print('Deleted files      : {0:,}'.format(num_deleted_files))
  490.   if __prog_options_truncateWrongSizeFiles:
  491.     print('Truncated files    : {0:,}'.format(num_truncated_files))
  492.  
  493.   if num_files_bigger_size > 0 and (num_files_smaller_size == 0 or num_files_missing == 0):
  494.     print("""WARNING
  495. Found files with bigger size than it should be.
  496. Run torrentverify with --check and --truncateWrongSizeFiles parameters
  497. to correct the problems.""")
  498.  
  499.   elif num_files_smaller_size > 0 or num_files_missing > 0:
  500.     print("""WARNING
  501. Found files with smaller size than it should be or there are missing files.
  502. It is likely there is some problem with this Torrent. Check with your Torrent
  503. client and download the torrent again.""")
  504.  
  505.   return ret_value
  506.  
  507. # Lists torrent unneeded files
  508. def check_torrent_unneeded_files(torrent):
  509.   print('Checking torrent unneeded files')
  510.   ret_value = 0
  511.  
  512.   # --- Make a recursive list of files in torrent data directory
  513.   torrent_directory = os.path.join(torrent.dir_data)
  514.   file_list = []
  515.   for root, dirs, files in os.walk(torrent_directory, topdown=False):
  516.     for name in files:
  517.       file_list.append(os.path.join(root, name))
  518.  
  519.   # --- Make a set of files in the list of torrent metadata files
  520.   print('  Status                                File name')
  521.   print('--------  ---------------------------------------')
  522.   torrent_file_list = []
  523.   for i in range(len(torrent.file_name_list)):
  524.     filename_path = os.path.join(torrent.dir_data, torrent.file_name_list[i])
  525.     torrent_file_list.append(filename_path);
  526.   torrent_file_set = set(torrent_file_list)
  527.  
  528.   # Check number of elements in list and set are the same. This means there are no
  529.   # duplicate files in the list
  530.   if len(torrent_file_list) != len(torrent_file_set):
  531.     print('len(torrent_file_list) != len(torrent_file_set)')
  532.     exit(1)
  533.  
  534.   # --- Check if files in the torrent directory are on the metadata file set
  535.   num_needed = 0
  536.   num_redundant = 0
  537.   num_deleted_files = 0
  538.   force_delete = False
  539.   text_size = 10
  540.   for i in range(len(file_list)):
  541.     if file_list[i] not in torrent_file_set:
  542.       print('UNNEEDED  {0}'.format(limit_string_lentgh(file_list[i], __cols -text_size)))
  543.       ret_value = 1
  544.       num_redundant += 1
  545.      
  546.       # --- Deleted unneeded file
  547.       if __prog_options_deleteUnneeded:
  548.         print('      RM  {0}'.format(limit_string_lentgh(file_list[i], __cols -text_size)))
  549.         # This option is very dangerous if user writes the wrong directory
  550.         # Always confirm with user
  551.         delete_file, force_delete = confirm_file_action('Delete', 'deleted', force_delete)
  552.         if delete_file:
  553.           os.unlink(file_list[i])
  554.           num_deleted_files += 1
  555.     else:
  556.       print('      OK  {0}'.format(limit_string_lentgh(file_list[i], __cols -text_size)))
  557.       num_needed += 1
  558.  
  559.  
  560.   # --- Print torrent metadata
  561.   print('')
  562.   print('Torrent file            : {0}'.format(torrent.torrent_file))
  563.   print('Pieces info             : {0:10,} pieces, {1:16,} bytes/piece'.format(torrent.num_pieces, torrent.piece_length))
  564.   print('Files info              : {0:10,} files,  {1:16,} total bytes'.format(torrent.num_files, torrent.total_bytes))
  565.   print('Torrent directory       : {0}'.format(torrent.dir_name))
  566.   print('Download directory      : {0}'.format(torrent.dir_download))
  567.   print('Data directory          : {0}'.format(torrent.dir_data))
  568.   print('Files in data directory : {0:,}'.format(len(file_list)))
  569.   print('Needed files            : {0:,}'.format(num_needed))
  570.   print('Unneeded files          : {0:,}'.format(num_redundant))
  571.   if __prog_options_deleteUnneeded:
  572.     print('Deleted files           : {0:,}'.format(num_deleted_files))
  573.  
  574.   if num_redundant > 0:
  575.     print("""WARNING
  576. Found unneeded files in the torrent download directory.
  577. Run torrentverify with --checkUnneeded and --deleteUnneeded parameters
  578. to correct the problems.""")
  579.  
  580.   return ret_value
  581.  
  582. # This naive piece generator only works if files have correct size and exist
  583. # on filesystem
  584. def pieces_generator_naive(torrent):
  585.   """Yield pieces from download file(s)."""
  586.   piece_length = torrent.piece_length
  587.  
  588.   # yield pieces from a multi-file torrent
  589.   # Iterator finishes when function exits but not with the yield keyword
  590.   if torrent.num_files > 1:
  591.     piece = b''
  592.     file_idx_list = []
  593.     # --- Iterate through all files
  594.     # print('{0:6d}'.format(pieces_counter))
  595.     for i in range(len(torrent.file_name_list)):
  596.       path = os.path.join(torrent.dir_data, torrent.file_name_list[i])
  597.  
  598.       # --- Read file
  599.       sfile = open(path, "rb")
  600.       file_idx_list.append(i)
  601.       while True:
  602.         piece += sfile.read(piece_length-len(piece))
  603.         if len(piece) != piece_length:
  604.           sfile.close()
  605.           break
  606.         yield (piece, file_idx_list)
  607.         # --- Go for another piece
  608.         piece = b''
  609.         file_idx_list = []
  610.         file_idx_list.append(i)
  611.     if piece != b'':
  612.       # print('yielding (last?) piece')
  613.       yield (piece, file_idx_list)
  614.  
  615.   # yield pieces from a single file torrent
  616.   else:
  617.     path = info['name']
  618.     print(path)
  619.     sfile = open(path.decode('UTF-8'), "rb")
  620.     while True:
  621.       piece = sfile.read(piece_length)
  622.       if not piece:
  623.         sfile.close()
  624.         return
  625.       yield piece
  626.  
  627. # This piece generator returns zeros if file does not exists. Also,
  628. # if files are padded at the end does not return that padding. This is to
  629. # mimic KTorrent behaviour: files will pass the SHA checksum of the torrent
  630. # but some files will have bigger sizes that need to be truncated.
  631. def pieces_generator(torrent, pieces_list=None):
  632.   piece_length = torrent.piece_length
  633.   pieces_range = range(torrent.num_pieces)
  634.   if pieces_list != None:
  635.     pieces_range = pieces_list
  636.   for piece_idx in pieces_range:
  637.     # Get list of files for this piece
  638.     this_piece_files_list = torrent.pieces_file_list[piece_idx]
  639.     # Iterate through files and make piece
  640.     piece = b''
  641.     file_idx_list = []
  642.     for file_idx in range(len(this_piece_files_list)):
  643.       # Get file info
  644.       file_dict = this_piece_files_list[file_idx]
  645.       file_name = torrent.file_name_list[file_dict['file_idx']]
  646.       file_start = file_dict['start_offset']
  647.       file_end = file_dict['end_offset']
  648.       file_correct_size = torrent.file_length_list[file_dict['file_idx']]
  649.       file_idx_list.append(file_dict['file_idx'])
  650.       # Read file
  651.       path = os.path.join(torrent.dir_data, file_name)
  652.       file_exists = os.path.isfile(path)
  653.       if file_exists:
  654.         file_size = os.path.getsize(path)
  655.         if file_size == file_correct_size:
  656.           # If downloaded file has correct size then read whithin the file
  657.           # limits. Maybe the whole file if file is smaller than the piece size
  658.           sfile = open(path, "rb")
  659.           sfile.seek(file_start)
  660.           piece += sfile.read(file_end - file_start)
  661.           sfile.close()
  662.         elif file_size < file_correct_size:
  663.           # If downloaded file has less size then pad with zeros.
  664.           # To simplify things, treat file as if it doesn't exist.
  665.           # Consequently, SHA1 check will fail.
  666.           piece += bytearray(file_end - file_start)
  667.         else:
  668.           # If downloaded file has more size then truncate file read. Note that
  669.           # SHA1 check may succed, but file will have an incorrect bigger size
  670.           # that must be truncated later.
  671.           sfile = open(path, "rb")
  672.           sfile.seek(file_start)
  673.           piece += sfile.read(file_end - file_start)
  674.           sfile.close()        
  675.       else:
  676.         # If file does not exists at all, just pad with zeros
  677.         piece += bytearray(file_end - file_start)
  678.     # Yield piece
  679.     yield (piece, file_idx_list, piece_idx)
  680.  
  681. # Checks torrent files against SHA1 hash for integrity
  682. def check_torrent_files_hash(torrent):
  683.   ret_value = 0
  684.   print('piece#  file#  HStatus  FStatus     Actual Bytes    Torrent Bytes  File name')
  685.   print('------ ------ -------- -------- ---------------- ----------------  --------------')
  686.   num_files_OK_list = []
  687.   num_files_bigger_size_list = []
  688.   num_files_smaller_size_list = []
  689.   num_files_missing_list = []
  690.   piece_counter = 0
  691.   good_pieces = 0
  692.   bad_pieces = 0
  693.   for piece, file_idx_list, piece_index in pieces_generator(torrent):
  694.     # --- Compare piece hash with expected hash
  695.     piece_hash = hashlib.sha1(piece).digest()
  696.     if piece_hash != torrent.pieces_hash_list[piece_index]:
  697.       hash_status = 'BAD_SHA'
  698.       bad_pieces += 1
  699.       ret_value = 1
  700.     else:
  701.       hash_status = 'GOOD_SHA'
  702.       good_pieces += 1
  703.  
  704.     # --- Print information
  705.     for i in range(len(file_idx_list)):
  706.       file_idx = file_idx_list[i]
  707.       path = os.path.join(torrent.dir_data, torrent.file_name_list[file_idx])
  708.       file_exists = os.path.isfile(path)
  709.       if file_exists:
  710.         file_size = os.path.getsize(path)
  711.         if file_size == torrent.file_length_list[file_idx]:
  712.           file_status = 'OK'
  713.           num_files_OK_list.append(file_idx)
  714.         else:
  715.           file_status = 'BAD_SIZE'
  716.           ret_value = 1
  717.           if file_size > torrent.file_length_list[file_idx]:
  718.             num_files_bigger_size_list.append(file_idx)
  719.           else:
  720.             num_files_smaller_size_list.append(file_idx)
  721.       else:
  722.         file_size = -1
  723.         file_status = 'MISSING'
  724.         ret_value = 1
  725.         num_files_missing_list.append(file_idx)
  726.       # --- Print odd/even pieces with different colors
  727.       text_size = 7+7+9+9+17+17+1
  728.       if piece_index % 2:
  729.         print('{0:06d} {1:6} {2:>8} {3:>8} {4:16,} {5:16,}  {6}'
  730.           .format(piece_index+1, file_idx+1, hash_status, file_status,
  731.                   file_size, torrent.file_length_list[file_idx],
  732.                   limit_string_lentgh(torrent.file_name_list[file_idx], __cols -text_size)))
  733.       else:
  734.         print('\033[0;97m{0:06d} {1:6} {2:>8} {3:>8} {4:16,} {5:16,}  {6}\033[0m'
  735.           .format(piece_index+1, file_idx+1, hash_status, file_status,
  736.                   file_size, torrent.file_length_list[file_idx],
  737.                   limit_string_lentgh(torrent.file_name_list[file_idx], __cols -text_size)))
  738.     # --- Increment piece counter
  739.     piece_counter += 1
  740.  
  741.   # --- Make lists set to avoid duplicates
  742.   num_files_OK_set           = set(num_files_OK_list)
  743.   num_files_bigger_size_set  = set(num_files_bigger_size_list)
  744.   num_files_smaller_size_set = set(num_files_smaller_size_list)
  745.   num_files_missing_set      = set(num_files_missing_list)
  746.  
  747.   # --- Print torrent metadata
  748.   print('')
  749.   print('Torrent file        : {0}'.format(torrent.torrent_file))
  750.   print('Pieces info         : {0:10,} pieces, {1:16,} bytes/piece'.format(torrent.num_pieces, torrent.piece_length))
  751.   print('Files info          : {0:10,} files,  {1:16,} total bytes'.format(torrent.num_files, torrent.total_bytes))
  752.   print('Torrent directory   : {0}'.format(torrent.dir_name))
  753.   print('Download directory  : {0}'.format(torrent.dir_download))
  754.   print('Data directory      : {0}'.format(torrent.dir_data))
  755.   print('Files OK            : {0:12,}'.format(len(num_files_OK_set)))
  756.   print('Files w big size    : {0:12,}'.format(len(num_files_bigger_size_set)))
  757.   print('Files w small size  : {0:12,}'.format(len(num_files_smaller_size_set)))
  758.   print('Files missing       : {0:12,}'.format(len(num_files_missing_set)))
  759.   print('# of pieces checked : {0:12,}'.format(piece_counter))
  760.   print('Good pieces         : {0:12,}'.format(good_pieces))
  761.   print('Bad pieces          : {0:12,}'.format(bad_pieces))
  762.  
  763.   if bad_pieces == 0 and len(num_files_bigger_size_set):
  764.     print("""WARNING
  765. Downloaded files pass SHA check but some files are bigger than they should be.
  766. Run torrentverify with --check and --truncateWrongSizeFiles parameters to correct the
  767. problems and then run torrentverify with --checkHash parameter only to make sure
  768. problems are solved.""")
  769.  
  770.   return ret_value
  771.  
  772. # Checks single file against SHA1 hash for integrity
  773. __debug_file_location_in_torrent = 0
  774. def check_torrent_files_single_hash(torrent, fileName):
  775.   ret_value = 0
  776.  
  777.   # Remove torrent download directory from path
  778.   dir_data = torrent.dir_data
  779.   fileName_search = fileName.replace(dir_data, '');
  780.   fileName_search = fileName_search.strip('/')
  781.  
  782.   if __debug_file_location_in_torrent:
  783.     print('dir_data         {0}'.format(dir_data))
  784.     print('fileName         {0}'.format(fileName))
  785.     print('fileName_search  {0}'.format(fileName_search))
  786.  
  787.   # Locate which pieces of the torrent this file spans
  788.   pieces_list = []
  789.   for piece_idx in range(torrent.num_pieces):
  790.     if __debug_file_location_in_torrent:
  791.       print('Piece {0:6}'.format(piece_idx))
  792.     this_piece_files_list = torrent.pieces_file_list[piece_idx]
  793.     for file_idx in range(len(this_piece_files_list)):
  794.       # Get file info
  795.       cfile_dict = this_piece_files_list[file_idx]
  796.       file_name = torrent.file_name_list[cfile_dict['file_idx']]
  797.       if file_name == fileName_search:
  798.         if __debug_file_location_in_torrent:
  799.           print('  MATCHED  {0}'.format(file_name))
  800.         pieces_list.append(piece_idx)
  801.       else:
  802.         if __debug_file_location_in_torrent:
  803.           print('UNMATCHED  {0}'.format(file_name))
  804.  
  805.   # DEBUG info
  806.   print('File           {0}'.format(fileName))
  807.   print('Internal name  {0}'.format(fileName_search))
  808.   print('File spans {0} pieces'.format(len(pieces_list)))
  809.   if __debug_file_location_in_torrent:
  810.     print('List of pieces')
  811.     for i in range(len(pieces_list)):
  812.       print(' #{0:6}'.format(pieces_list[i]))
  813.  
  814.   if len(pieces_list) < 1:
  815.     print('ERROR File not found in torrent list of files. Exiting.')
  816.     sys.exit(1)
  817.  
  818.   # --- Check pieces in list only
  819.   print('piece#  file# HStatus  FStatus     Actual Bytes    Torrent Bytes  File name')
  820.   print('------ ------ -------- -------- ---------------- ----------------  --------------')
  821.   piece_counter = 0
  822.   good_pieces = 0
  823.   bad_pieces = 0
  824.   for piece, file_idx_list, piece_index in pieces_generator(torrent, pieces_list):
  825.     # --- Compare piece hash with expected hash
  826.     piece_hash = hashlib.sha1(piece).digest()
  827.     if piece_hash != torrent.pieces_hash_list[piece_index]:
  828.       hash_status = 'BAD_SHA'
  829.       bad_pieces += 1
  830.       ret_value = 1
  831.     else:
  832.       hash_status = 'GOOD_SHA'
  833.       good_pieces += 1
  834.  
  835.     # --- Print information
  836.     for i in range(len(file_idx_list)):
  837.       file_idx = file_idx_list[i]
  838.       path = os.path.join(torrent.dir_data, torrent.file_name_list[file_idx])
  839.       file_exists = os.path.isfile(path)
  840.       if file_exists:
  841.         file_size = os.path.getsize(path)
  842.         if file_size == torrent.file_length_list[file_idx]:
  843.           file_status = 'OK'
  844.         else:
  845.           file_status = 'BAD_SIZE'
  846.           ret_value = 1
  847.       else:
  848.         file_size = -1
  849.         file_status = 'MISSING'
  850.         ret_value = 1
  851.       # --- Print odd/even pieces with different colors
  852.       text_size = 7+7+9+9+17+17+1
  853.       if piece_index % 2:
  854.         print('{0:06d} {1:6} {2:>8} {3:>8} {4:16,} {5:16,}  {6}'
  855.           .format(piece_index+1, file_idx+1, hash_status, file_status,
  856.                   file_size, torrent.file_length_list[file_idx],
  857.                   limit_string_lentgh(torrent.file_name_list[file_idx], __cols -text_size)))
  858.       else:
  859.         print('\033[0;97m{0:06d} {1:6} {2:>8} {3:>8} {4:16,} {5:16,}  {6}\033[0m'
  860.           .format(piece_index+1, file_idx+1, hash_status, file_status,
  861.                   file_size, torrent.file_length_list[file_idx],
  862.                   limit_string_lentgh(torrent.file_name_list[file_idx], __cols -text_size)))
  863.     # --- Increment piece counter
  864.     piece_counter += 1
  865.  
  866.   # --- Print torrent metadata
  867.   print('')
  868.   print('Torrent file        : {0}'.format(torrent.torrent_file))
  869.   print('Pieces info         : {0:10,} pieces, {1:16,} bytes/piece'.format(torrent.num_pieces, torrent.piece_length))
  870.   print('Files info          : {0:10,} files,  {1:16,} total bytes'.format(torrent.num_files, torrent.total_bytes))
  871.   print('Torrent directory   : {0}'.format(torrent.dir_name))
  872.   print('Download directory  : {0}'.format(torrent.dir_download))
  873.   print('Data directory      : {0}'.format(torrent.dir_data))
  874.   print('# of pieces checked : {0:12,}'.format(piece_counter))
  875.   print('Good pieces         : {0:12,}'.format(good_pieces))
  876.   print('Bad pieces          : {0:12,}'.format(bad_pieces))
  877.  
  878.   return ret_value
  879.  
  880. def do_printHelp():
  881.   print("""\033[32mUsage: torrentverify.py -t file.torrent [-d /download_dir/] [options]\033[0m
  882.  
  883. If only the torrent file is input with -t file.torrent then torrent file contents
  884. are listed but no other action is performed.
  885.  
  886. \033[32mOptions:
  887. \033[35m-t\033[0m \033[31mfile.torrent\033[0m             Torrent filename.
  888. \033[35m-d\033[0m \033[31m/download_dir/\033[0m           Directory where torrent is downloaded.
  889. \033[35m--otd\033[0m                       Override torrent data directory.
  890. \033[35m--check\033[0m                     Quick check of downloaded files.
  891. \033[35m--truncateWrongSizeFiles\033[0m    Truncate files whose size is bigger than it should be.
  892. \033[35m--deleteWrongSizeFiles\033[0m      Delete files whose size is incorrect.
  893. \033[35m--checkUnneeded\033[0m             Finds unneeded files in data directory.
  894. \033[35m--deleteUnneeded\033[0m            Deletes unneeded files in the data directory.
  895. \033[35m--checkHash\033[0m                 Checks Torrent data using SHA1 hash.
  896. \033[35m--checkFile\033[0m \033[31mfile\033[0m            Checks a single downloaded file against the SHA1 checksum.""")
  897.  
  898. # -----------------------------------------------------------------------------
  899. # main function
  900. #
  901. # Program returns
  902. # 0 everything OK
  903. # 1 error found with torrent files
  904. # 2 error in program arguments or no arguments given
  905. # 3 torrent file does not found
  906. # 4 data directory not found
  907. # -----------------------------------------------------------------------------
  908. print('\033[36mTorrentVerify\033[0m' + ' version ' + __software_version)
  909.  
  910. # --- Command line parser
  911. p = argparse.ArgumentParser()
  912. p.add_argument('-t', help="Torrent file", nargs = 1)
  913. p.add_argument("-d", help="Data directory", nargs = 1)
  914. p.add_argument("--otd", help="Override torrent directory", action="store_true")
  915. g = p.add_mutually_exclusive_group()
  916. g.add_argument("--check", help="Do a basic torrent check: files there or not and size", action="store_true")
  917. g.add_argument("--checkUnneeded", help="Write me", action="store_true")
  918. g.add_argument("--checkHash", help="Full check with SHA1 hash", action="store_true")
  919. g.add_argument("--checkFile", help="Check single file with SHA1 hash", nargs = 1)
  920. d = p.add_mutually_exclusive_group()
  921. d.add_argument("--deleteWrongSizeFiles", help="Delete files having wrong size", action="store_true")
  922. d.add_argument("--truncateWrongSizeFiles", help="Chop files with incorrect size to right one", action="store_true")
  923. p.add_argument("--deleteUnneeded", help="Write me", action="store_true")
  924. args = p.parse_args();
  925.  
  926. # --- Read arguments
  927. torrentFileName = data_directory = None
  928. check = checkUnneeded = checkHash = 0
  929.  
  930. if args.t:
  931.   torrentFileName = args.t[0];
  932. if args.d:
  933.   data_directory = args.d[0];
  934.  
  935. # Optional arguments
  936. if args.otd:
  937.   __prog_options_override_torrent_dir = 1
  938.  
  939. if args.deleteWrongSizeFiles:
  940.   __prog_options_deleteWrongSizeFiles = 1
  941.  
  942. if args.truncateWrongSizeFiles:
  943.   __prog_options_truncateWrongSizeFiles = 1
  944.  
  945. if args.deleteUnneeded:
  946.   __prog_options_deleteUnneeded = 1
  947.  
  948. # --- Extrant torrent metadata
  949. if not torrentFileName:
  950.   do_printHelp()
  951.   sys.exit(2)
  952.  
  953. if (args.check or args.checkUnneeded or args.checkHash or args.checkFile) \
  954.     and data_directory == None:
  955.   do_printHelp()
  956.   sys.exit(2)
  957.  
  958. # --- Check for torrent file existence
  959. if not os.path.isfile(torrentFileName):
  960.   print('Torrent file not found: {0}'.format(torrentFileName))
  961.   sys.exit(3)
  962.  
  963. # --- Read torrent file metadata  
  964. torrent_obj = extract_torrent_metadata(torrentFileName)
  965.  
  966. # --- Get torrent data directory and check it exists
  967. if data_directory != None:
  968.   torrent_obj.dir_download = data_directory
  969.   # User wants to override torrent data directory
  970.   if __prog_options_override_torrent_dir:
  971.     torrent_obj.dir_data = torrent_obj.dir_download
  972.   # Normal mode of operation
  973.   else:
  974.     torrent_obj.dir_data = os.path.join(data_directory, torrent_obj.dir_name)
  975.   # Check that data directory exists
  976.   if not os.path.isdir(torrent_obj.dir_data):
  977.     print('Data directory not found: {0}'.format(torrent_obj.dir_data))
  978.     exit(4)
  979.  
  980. # --- Decide what to do based on arguments
  981. ret_value = 0
  982. if args.check:
  983.   ret_value = check_torrent_files_only(torrent_obj)
  984. elif args.checkUnneeded:
  985.   ret_value = check_torrent_unneeded_files(torrent_obj)
  986. elif args.checkHash:
  987.   ret_value = check_torrent_files_hash(torrent_obj)
  988. elif args.checkFile:
  989.   ret_value = check_torrent_files_single_hash(torrent_obj, args.checkFile[0])
  990. else:
  991.   ret_value = list_torrent_contents(torrent_obj)
  992. sys.exit(ret_value)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement