import sys import os import hashlib import time import shutil def chunk_reader(fobj, chunk_size=1024): while True: chunk = fobj.read(chunk_size) if not chunk: return yield chunk dub_list=[] mv_list_src=[] mv_list_dst=[] def check_for_duplicates(paths, hash=hashlib.sha1): hashes = {} dub_list=[] mv_list=[] for path in paths: if not os.path.exists(path+'\duplicate'): os.mkdir(path+'\duplicate') for dirpath, dirnames, filenames in os.walk(path): for filename in filenames: full_path = os.path.join(dirpath, filename) mtime=time.strftime("Ostatnia modyfikacja: %b/%Y", time.localtime(os.path.getmtime(full_path))) mtime_path=time.strftime("\\%Y\\%m", time.gmtime(os.path.getmtime(full_path))) print("File:",filename,mtime) dst_dir=dirpath+mtime_path hashobj = hash() for chunk in chunk_reader(open(full_path, 'rb')): hashobj.update(chunk) file_id = (hashobj.digest(), os.path.getsize(full_path)) duplicate = hashes.get(file_id, None) if duplicate: print("Znalezione duplikaty: %s and %s" % (full_path, duplicate)) if os.path.getmtime(full_path) < os.path.getmtime(duplicate): dub_list.append(full_path) else: if not os.path.exists(dst_dir): os.makedirs(dst_dir) mv_list_src.append(full_path) mv_list_dst.append(dst_dir) else: hashes[file_id] = full_path if not os.path.exists(dst_dir): os.makedirs(dst_dir) mv_list_src.append(full_path) mv_list_dst.append(dst_dir) for x in dub_list: shutil.move(x,path+'\duplicate\\') for src,dst in zip(mv_list_src, mv_list_dst): shutil.move(src,dst) if sys.argv[1:]: check_for_duplicates(sys.argv[1:]) else: print("Podaj sciezke jako argument")