Guest User

Untitled

a guest
Feb 16th, 2019
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.03 KB | None | 0 0
  1. import glob
  2. import hashlib
  3. import os
  4. import pickle
  5. import shutil
  6. from os.path import basename, join
  7.  
  8.  
  9. def get_sha1_from_path(path):
  10. BUF_SIZE = 200000 # lets read stuff in 64kb chunks!
  11.  
  12. sha1 = hashlib.sha1()
  13.  
  14. with open(path, 'rb') as f:
  15. while True:
  16. data = f.read(BUF_SIZE)
  17. if not data:
  18. break
  19. sha1.update(data)
  20.  
  21. return str(sha1.hexdigest())
  22.  
  23.  
  24. def get_hashes_path(path):
  25. _dict = {}
  26. files_len = len(path)
  27. for i, file in enumerate(path):
  28. if os.path.isdir(file):
  29. continue
  30. if not basename(file).split(".")[0]:
  31. continue
  32. _dict[file] = get_sha1_from_path(file)
  33. print(f"{i}/{files_len}")
  34. return _dict
  35.  
  36.  
  37. TARGET_DIR = r"H:\FolderOne"
  38. DIFF_DIR_COPY = r"H:\OutputFolder"
  39. SOURCE_DIR = r"J:\FolderTwo"
  40.  
  41. files_in_target = glob.glob(TARGET_DIR + r"\**\*", recursive=True)
  42. files_in_source = glob.glob(SOURCE_DIR + r"\**\*", recursive=True)
  43.  
  44. files_in_target_len = len(files_in_target)
  45. files_in_source_len = len(files_in_source)
  46.  
  47. if os.path.exists('target_pickle'):
  48. with open('target_pickle', 'rb') as f:
  49. TARGET_DIR_FILES_HASHED = pickle.load(f)
  50. else:
  51. TARGET_DIR_FILES_HASHED = get_hashes_path(files_in_target)
  52. with open('target_pickle', 'wb+') as f:
  53. pickle.dump(TARGET_DIR_FILES_HASHED, f)
  54.  
  55. if os.path.exists('source_pickle'):
  56. with open('source_pickle', 'rb') as f:
  57. SOURCE_DIR_FILES_HASHED = pickle.load(f)
  58. else:
  59. SOURCE_DIR_FILES_HASHED = get_hashes_path(files_in_source)
  60. with open('source_pickle', 'wb+') as f:
  61. pickle.dump(SOURCE_DIR_FILES_HASHED, f)
  62.  
  63. to_copy = len(SOURCE_DIR_FILES_HASHED.keys()) - len(TARGET_DIR_FILES_HASHED.keys())
  64. copied = 0
  65. for key, val in SOURCE_DIR_FILES_HASHED.items():
  66. if val not in TARGET_DIR_FILES_HASHED.values():
  67. split_base = basename(key).split(".")
  68. diff_name = f"{split_base[0] + val[:3]}.{''.join(split_base[1:])}"
  69. shutil.copy(key, join(DIFF_DIR_COPY, diff_name))
  70. copied += 1
  71. print(f"Copied {copied} / {to_copy}")
Add Comment
Please, Sign In to add comment