Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import os
- import pathlib
- import hashlib
- class DuplicateFileHandler:
- def __init__(self):
- self.file_dict = {}
- self._format = ''
- self.option = ''
- self.sorted_file_dict = {}
- def get_dict(self):
- if len(sys.argv) < 2:
- print('Directory is not specified')
- sys.exit()
- else:
- # print(sys.argv)
- for (root, dirs, files) in os.walk(sys.argv[1], topdown=True):
- # print(root,dirs, files)
- for name in files:
- # path = os.path.abspath(os.path.join(root, name))
- path = os.path.join(root, name)
- size = os.path.getsize(path)
- with open(path, 'rb') as f:
- # print(path)
- _bytes = f.read()
- _hash = hashlib.md5(_bytes).hexdigest()
- # print(_hash)
- try:
- temp = self.file_dict[size][_hash]
- except:
- self.file_dict.update({size: {_hash: [path]}})
- else:
- temp.append(path)
- self.file_dict.update({size: {_hash: temp}})
- # _dict.update({size: {_hash: []}})
- # _dict.setdefault(size[_hash], []).append(path)
- # _dict[size][_hash].append(path)
- # print('name', name)
- def sort_dict(self, _reverse=True):
- for i in sorted(self.file_dict, reverse=_reverse):
- for j in self.file_dict[i]:
- for z in self.file_dict[i][j]:
- if len(self.file_dict[i][j]) > 1:
- # self.sorted_file_dict[i][j] = z
- if pathlib.Path(z).suffix[1:] == self._format or self._format == '':
- self.sorted_file_dict[i] = self.file_dict[i]
- # print(i, 'bytes')
- # for x in _dict[i]:
- # print(x)
- print(f'{i} bytes', *self.file_dict[i][j], sep='\n', end='\n\n')
- break
- self.check_dup()
- def check_dup(self='Descending'):
- while True:
- print('Check for duplicates?')
- dup_choice = input()
- if dup_choice == 'yes':
- print()
- n = 0
- for x in self.sorted_file_dict: # need to sort here
- for y in self.sorted_file_dict[x]:
- if len(self.sorted_file_dict[x][y]) > 1:
- print(f'{x} bytes', f'Hash: {y}', sep='\n')
- for _z in self.sorted_file_dict[x][y]:
- n += 1
- # print(y, z)
- print(f'{n}.', _z)
- print('\n')
- break
- elif dup_choice == 'no':
- break
- else:
- print('Wrong option\n')
- def operate(self):
- self.get_dict()
- # print(self.file_dict)
- self._format = input('Enter file format:')
- print('''
- Size sorting options:
- 1. Descending
- 2. Ascending
- ''')
- while True:
- print('Enter a sorting option:')
- self.option = input()
- if self.option in ['1', '2']:
- print()
- self.option = int(self.option)
- if self.option == 1:
- self.sort_dict()
- elif self.option == 2:
- self.sort_dict(_reverse=False)
- break
- else:
- print('Wrong option\n')
- handler = DuplicateFileHandler()
- handler.operate()
Add Comment
Please, Sign In to add comment