Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import glob
- import hashlib
- import os
- import pickle
- import shutil
- from os.path import basename, join
- def get_sha1_from_path(path):
- BUF_SIZE = 200000 # lets read stuff in 64kb chunks!
- sha1 = hashlib.sha1()
- with open(path, 'rb') as f:
- while True:
- data = f.read(BUF_SIZE)
- if not data:
- break
- sha1.update(data)
- return str(sha1.hexdigest())
- def get_hashes_path(path):
- _dict = {}
- files_len = len(path)
- for i, file in enumerate(path):
- if os.path.isdir(file):
- continue
- if not basename(file).split(".")[0]:
- continue
- _dict[file] = get_sha1_from_path(file)
- print(f"{i}/{files_len}")
- return _dict
- TARGET_DIR = r"H:\FolderOne"
- DIFF_DIR_COPY = r"H:\OutputFolder"
- SOURCE_DIR = r"J:\FolderTwo"
- files_in_target = glob.glob(TARGET_DIR + r"\**\*", recursive=True)
- files_in_source = glob.glob(SOURCE_DIR + r"\**\*", recursive=True)
- files_in_target_len = len(files_in_target)
- files_in_source_len = len(files_in_source)
- if os.path.exists('target_pickle'):
- with open('target_pickle', 'rb') as f:
- TARGET_DIR_FILES_HASHED = pickle.load(f)
- else:
- TARGET_DIR_FILES_HASHED = get_hashes_path(files_in_target)
- with open('target_pickle', 'wb+') as f:
- pickle.dump(TARGET_DIR_FILES_HASHED, f)
- if os.path.exists('source_pickle'):
- with open('source_pickle', 'rb') as f:
- SOURCE_DIR_FILES_HASHED = pickle.load(f)
- else:
- SOURCE_DIR_FILES_HASHED = get_hashes_path(files_in_source)
- with open('source_pickle', 'wb+') as f:
- pickle.dump(SOURCE_DIR_FILES_HASHED, f)
- to_copy = len(SOURCE_DIR_FILES_HASHED.keys()) - len(TARGET_DIR_FILES_HASHED.keys())
- copied = 0
- for key, val in SOURCE_DIR_FILES_HASHED.items():
- if val not in TARGET_DIR_FILES_HASHED.values():
- split_base = basename(key).split(".")
- diff_name = f"{split_base[0] + val[:3]}.{''.join(split_base[1:])}"
- shutil.copy(key, join(DIFF_DIR_COPY, diff_name))
- copied += 1
- print(f"Copied {copied} / {to_copy}")
Add Comment
Please, Sign In to add comment