SHARE
TWEET

torrent parser

ikwtif Oct 19th, 2019 (edited) 96 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import difflib
  2. import glob
  3. import json
  4. import logging
  5. import os
  6. from pathlib import Path
  7. import re
  8. import shutil
  9. import structlog
  10. import sys
  11. import time
  12. import torrent_parser
  13.  
  14. '''
  15. requirements to install
  16. six==1.12.0
  17. structlog==19.2.0
  18. torrent-parser==0.3.0
  19. '''
  20.  
  21. ##########################VARIABELS##############################################
  22.  
  23. # set to True if you want to move the files (set to False so you can test first)
  24. move_files = False
  25. # confidence level == used for not matching filenames, set threshold level for accepting modified filename  
  26. confidence_level = 0.9
  27. # set this to True if you want to clean filenames, which removes one of [] and one of () if it's present in the file name -- does not remove multiple, only first appearance
  28. # used for matching filenames and determining confidence level
  29. cleaning = False
  30. # path to top folder with torrent files to load -- example: "C:\\torrents\\"
  31. torrents_path = ""
  32. extension = ".torrent"
  33. # list of all top folder paths to folders with video files to check -- example: ["C:\\Documents\\files\\", "C:\\myfiles\\myvids\\"]
  34. video_path = []
  35. # path to move matched video files to -- example: "C:\\Documents\\\\moved\\"
  36. move_path = ""
  37.  
  38.  
  39.  
  40. ##########################LOGGER##############################################
  41. runtime = time.localtime()
  42. current_time = time.strftime('%m-%d-%H-%M-%S', runtime)
  43. log_mode = 'standard'
  44. if log_mode == 'standard':
  45.     log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
  46. handler = logging.StreamHandler(sys.stdout)
  47. handler.setFormatter(log_formatter)
  48. fh = logging.FileHandler(f'{current_time} - log.log')
  49. fh.setFormatter(log_formatter)
  50. logger = logging.getLogger()
  51. logger.addHandler(fh)
  52. logger.addHandler(handler)
  53. logger.setLevel(logging.INFO)
  54.  
  55.  
  56. ##########################FUNCTIONS##############################################
  57.  
  58. def remove_nested_parens(input_str, char):
  59.     """Returns a copy of 'input_str' with any text removed between specified characters. Nested parentheses are handled."""
  60.     result = ''
  61.     paren_level = 0
  62.     for ch in input_str:
  63.         if ch == char[0]:
  64.             paren_level += 1
  65.         elif (ch == char[1]) and paren_level:
  66.             paren_level -= 1
  67.         elif not paren_level:
  68.             result += ch
  69.     return result
  70.  
  71.  
  72. def clean_confidence(file_name, torrent_file_name):
  73.     file_name_clean = file_name
  74.     name_clean = torrent_file_name
  75.     if cleaning:
  76.         for char in [['[',']'],['(',')']]:  #remove text between these characters [] and ()
  77.             file_name_clean = remove_nested_parens(file_name_clean, char)
  78.             name_clean = remove_nested_parens(name_clean, char)
  79.     # test similarity based on torrent name and file name
  80.     confidence = difflib.SequenceMatcher(None,file_name_clean, name_clean).ratio()
  81.     return confidence
  82.    
  83.  
  84.  
  85. ##############################CODE##########################################
  86.  
  87. #grab all torrent files inside torrents_path folder and subfolders
  88. torrent_files = list()
  89. for filename in Path(torrents_path).glob('**/*{}'.format(extension)):
  90.     torrent_files.append(filename)
  91.  
  92. #read info from torrent files
  93. files = dict()
  94. for file_path in torrent_files:
  95.     filename = os.path.basename(file_path)
  96.     files[filename] = dict()
  97.     try:
  98.         data = torrent_parser.parse_torrent_file(file_path)
  99.     except Exception:
  100.         logger.warning(f'torrent filed to load data at {file_path}')
  101.         continue
  102.     try:
  103.         names = data['info']['files']
  104.         for file in names:
  105.             if file['path'][0].lower() == 'screens':
  106.                 pass
  107.             elif file['path'][0].lower() == 'videos':
  108.                 files[filename][file['path'][1]] = file['length']
  109.             else:
  110.                 if len(file['path']) == 1:
  111.                     files[filename][file['path'][0]] = file['length']
  112.     except Exception:
  113.         name = data['info']['name']
  114.         length = data['info']['length']
  115.         files[filename] = {name: length}
  116.  
  117. #grab all files from video_path folder and subfolders
  118. #key = size
  119. #value = list of videopath, name
  120. video_files = dict()
  121. for path in video_path:
  122.     for video in Path(path).glob('**/*'):
  123.         name = os.path.basename(video)
  124.         size = os.path.getsize(video)
  125.         if os.path.isdir(video):
  126.             pass
  127.         elif name.endswith('.jpg'):
  128.             pass
  129.         elif name.endswith('.jpeg'):
  130.             pass
  131.         elif name.endswith('.JPG'):
  132.             pass
  133.         elif name.endswith('.zip'):
  134.             pass
  135.         elif name.endswith('.png'):
  136.             pass
  137.         elif name.endswith('.gif'):
  138.             pass
  139.         elif name.endswith('.mp3'):
  140.             pass
  141.         else:
  142.             if size in video_files:
  143.                 value = video_files[size]
  144.                 value.extend([str(video)])
  145.                 video_files[size] = value
  146.             else:
  147.                 #TODO -> remove string? And do this on saving??
  148.                 video_files[size] = [str(video)]
  149.  
  150. #torrent = torrentname
  151. #files = dictionary of files in torrent
  152. torrent_del = list()
  153. for torrentz, torrent_files in files.items():
  154.     logger.debug(f'------------> starting new torrent: {torrentz}')
  155.     for name, size in torrent_files.items():
  156.         if not name.endswith('.jpeg') and not name.endswith('.gif'):
  157.             if size in video_files:
  158.                 all_names = list()
  159.                 for names in video_files[size]:
  160.                     all_names.append(os.path.basename(names))
  161.  
  162.                 if name in all_names:   # check for name match
  163.                     index = all_names.index(name)
  164.                     new_path = move_path + torrentz + '\\' + os.path.basename(video_files[size][index])
  165.  
  166.                     if move_files:
  167.                         # move video file to new folder
  168.                         try:
  169.                             os.makedirs(os.path.dirname(new_path), exist_ok=True)
  170.                             shutil.move(video_files[size][index], new_path)
  171.                         except FileNotFoundError:
  172.                             logger.warning(f'{file_name} not found')
  173.                             pass
  174.                    
  175.                     # check for multiple files
  176.                     if index == 0:
  177.                         del video_files[size]
  178.                     else:
  179.                         value = video_files[size]
  180.                         del value[index]
  181.                         video_files[size] = value
  182.                     # add matched torrent file info to list
  183.                     torrent_del.append([torrentz, name])
  184.                        
  185.                
  186.                 else:                           #if name does not match -- check for many files exist for this size
  187.                     if len(video_files[size]) == 1:     # if there is only one file at this size
  188.                        
  189.                         file_name = os.path.basename(video_files[size][0])
  190.                         #check confidence level for name match
  191.                         confidence = clean_confidence(file_name=file_name, torrent_file_name= name)
  192.                         if  confidence > confidence_level:     # assume file match at X confidence level
  193.                             # create new path
  194.                             new_path = move_path + torrentz + '\\' + file_name
  195.                             if move_files:
  196.                                 # move video file to new folder
  197.                                 try:
  198.                                     os.makedirs(os.path.dirname(new_path), exist_ok=True)
  199.                                     shutil.move(video_files[size][0], new_path)
  200.                                 except FileNotFoundError:
  201.                                     logger.warning(f'{file_name} not found')
  202.                                     pass
  203.                            
  204.  
  205.                             # remove file from dictionary key
  206.                             del video_files[size]
  207.                             # add matched torrent file info to list
  208.                             torrent_del.append([torrentz, name])
  209.                             logging.info(f'found 1 file for size: {size} \n    {file_name} \n    confidence level {confidence} \n    name to match: {name}')
  210.                            
  211.                         else:                   # assume wrong file with low confidence level
  212.                             pass
  213.  
  214.                     else:   # multiple files at this size exist
  215.                         start = 0
  216.                         confidence_levels = list()
  217.                         # check for confidence levels for each video file
  218.                         number_files = len(video_files[size])
  219.                         while start < len(video_files[size]):
  220.                             file_name = os.path.basename(video_files[size][start])
  221.                             confidence = clean_confidence(file_name=file_name, torrent_file_name=name)
  222.                             confidence_levels.append(confidence)
  223.                             start += 1
  224.  
  225.                         #get file with highest confidence level
  226.                         max_confidence = max(confidence_levels)
  227.                         if max_confidence > confidence_level:   # assume file match at X confidence level
  228.                             max_index = confidence_levels.index(max_confidence)
  229.                             # create new path
  230.                             new_path = move_path + torrentz + '\\' + file_name
  231.                             if move_files:
  232.                                 #move video file to new folder
  233.                                 try:
  234.                                     os.makedirs(os.path.dirname(new_path), exist_ok=True)
  235.                                     shutil.move(video_files[size][max_index], new_path)
  236.                                 except FileNotFoundError:
  237.                                     logger.warning(f'{file_name} not found')
  238.                                     pass
  239.    
  240.                             value = video_files[size]
  241.                             del value[max_index]
  242.                             video_files[size] = value
  243.                             # add matched torrent file info to list
  244.                             torrent_del.append([torrentz, name])
  245.  
  246.                            
  247.                                
  248.                             logging.info(f'{len(confidence_levels)} files found \n    {confidence_levels} \n    {file_name} \n    confidence level {max_confidence} \n    name to match: {name}')
  249.                         else:               # assume wrong file with low confidence level
  250.                             logging.warning(f'confidence level too low \n    {len(confidence_levels)} files found \n    {confidence_levels} \n    {video_files[size]} \n    {name}')
  251.                             pass
  252.             else:
  253.                 pass
  254.                 #logging.info(f'nothing found for file \n    {name}')
  255.                 # assume wrong file
  256.         else:
  257.             #file is jpeg or gif#
  258.             pass
  259.  
  260.  
  261. # cleaning torrent information based on found files
  262. for list_item in torrent_del:
  263.     del files[list_item[0]][list_item[1]]
  264.  
  265. # delete keys for empty values
  266. for k, v in list(files.items()):
  267.     if bool(v) == False:
  268.         del files[k]
  269.  
  270. # save dictionary with torrent info
  271. # all files that not have been found per torrent
  272. with open(f'{current_time} - torrent_files.json', 'w') as fp:
  273.     json.dump(files, fp, indent=4)
  274.    
  275. # save dictionary with video file info
  276. # all files that not have been found per size
  277. with open(f'{current_time} - video_files.json', 'w') as fp:
  278.     json.dump(video_files, fp, indent=4)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top