MeowalsoMeow

duplicate_files_stage3

Oct 3rd, 2021 (edited)
261
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.83 KB | None | 0 0
  1. import sys
  2. import os
  3. import pathlib
  4. import hashlib
  5.  
  6.  
  7. class DuplicateFileHandler:
  8.  
  9. def __init__(self):
  10. self.file_dict = {}
  11. self._format = ''
  12. self.option = ''
  13. self.sorted_file_dict = {}
  14.  
  15. def get_dict(self):
  16. if len(sys.argv) < 2:
  17. print('Directory is not specified')
  18. sys.exit()
  19. else:
  20. # print(sys.argv)
  21. for (root, dirs, files) in os.walk(sys.argv[1], topdown=True):
  22. # print(root,dirs, files)
  23. for name in files:
  24. # path = os.path.abspath(os.path.join(root, name))
  25. path = os.path.join(root, name)
  26. size = os.path.getsize(path)
  27. with open(path, 'rb') as f:
  28. # print(path)
  29. _bytes = f.read()
  30. _hash = hashlib.md5(_bytes).hexdigest()
  31. # print(_hash)
  32. try:
  33. temp = self.file_dict[size][_hash]
  34. except:
  35. self.file_dict.update({size: {_hash: [path]}})
  36. else:
  37. temp.append(path)
  38. self.file_dict.update({size: {_hash: temp}})
  39. # _dict.update({size: {_hash: []}})
  40. # _dict.setdefault(size[_hash], []).append(path)
  41. # _dict[size][_hash].append(path)
  42. # print('name', name)
  43.  
  44. def sort_dict(self, _reverse=True):
  45. for i in sorted(self.file_dict, reverse=_reverse):
  46. for j in self.file_dict[i]:
  47. for z in self.file_dict[i][j]:
  48. if len(self.file_dict[i][j]) > 1:
  49. # self.sorted_file_dict[i][j] = z
  50. if pathlib.Path(z).suffix[1:] == self._format or self._format == '':
  51. self.sorted_file_dict[i] = self.file_dict[i]
  52. # print(i, 'bytes')
  53. # for x in _dict[i]:
  54. # print(x)
  55. print(f'{i} bytes', *self.file_dict[i][j], sep='\n', end='\n\n')
  56. break
  57. self.check_dup()
  58.  
  59. def check_dup(self='Descending'):
  60. while True:
  61. print('Check for duplicates?')
  62. dup_choice = input()
  63. if dup_choice == 'yes':
  64. print()
  65. n = 0
  66. for x in self.sorted_file_dict: # need to sort here
  67. for y in self.sorted_file_dict[x]:
  68. if len(self.sorted_file_dict[x][y]) > 1:
  69. print(f'{x} bytes', f'Hash: {y}', sep='\n')
  70. for _z in self.sorted_file_dict[x][y]:
  71. n += 1
  72. # print(y, z)
  73. print(f'{n}.', _z)
  74.  
  75. print('\n')
  76. break
  77.  
  78. elif dup_choice == 'no':
  79. break
  80. else:
  81. print('Wrong option\n')
  82.  
  83. def operate(self):
  84. self.get_dict()
  85. # print(self.file_dict)
  86. self._format = input('Enter file format:')
  87. print('''
  88. Size sorting options:
  89. 1. Descending
  90. 2. Ascending
  91. ''')
  92. while True:
  93. print('Enter a sorting option:')
  94. self.option = input()
  95. if self.option in ['1', '2']:
  96. print()
  97. self.option = int(self.option)
  98. if self.option == 1:
  99. self.sort_dict()
  100. elif self.option == 2:
  101. self.sort_dict(_reverse=False)
  102. break
  103. else:
  104. print('Wrong option\n')
  105.  
  106.  
  107. handler = DuplicateFileHandler()
  108. handler.operate()
  109.  
Add Comment
Please, Sign In to add comment