Advertisement
DeaD_EyE

file_copy + progress + hash generation + check

Jul 28th, 2019
257
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.33 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. """
  3. Program to copy a file from source to destination.
  4. During the copy operation the hash value is calculated.
  5. A progress bar is also supported.
  6.  
  7. If tqdm is not installed, it will install via
  8. pip as a user site-package.
  9. """
  10.  
  11. import hashlib
  12. import sys
  13. import mmap
  14. from argparse import ArgumentParser
  15. from argparse import ArgumentDefaultsHelpFormatter as HelpFormatter
  16. from pathlib import Path
  17. from subprocess import call
  18.  
  19.  
  20. def chunker(file_size, chunk_size):
  21.     pos = 0
  22.     while pos < file_size:
  23.         next_pos = min(pos + chunk_size, file_size)
  24.         yield slice(pos, next_pos)
  25.         pos += chunk_size
  26.  
  27.  
  28. def copy(src, dst, buffer_size, hash_algo, progress=False, check=False):
  29.     file_size = src.stat().st_size
  30.     if progress:
  31.         progress_bar = tqdm(
  32.             total=file_size,
  33.             desc=f'Copy {src.name} > {dst.name}',
  34.             unit="B", unit_scale=True,
  35.             unit_divisor=1024,
  36.             )
  37.     hasher = hashlib.new(hash_algo)
  38.     with src.open('rb') as src_fd:
  39.         mm_src = mmap.mmap(src_fd.fileno(), 0, access=mmap.ACCESS_READ)
  40.         with dst.open('r+b') as dst_fd:
  41.             for data_slice in chunker(file_size, buffer_size):
  42.                 dst_fd.write(mm_src[data_slice])
  43.                 hasher.update(mm_src[data_slice])
  44.                 if progress:
  45.                     progress_bar.update(data_slice.stop - data_slice.start)
  46.             progress_bar.close()
  47.             if check:
  48.                 dst_fd.seek(0)
  49.                 dst_hasher = hashlib.new(hash_algo)
  50.                 if progress:
  51.                     progress_bar = tqdm(
  52.                         total=file_size,
  53.                         desc=f'Hashing {dst.name}',
  54.                         unit="B", unit_scale=True,
  55.                         unit_divisor=1024,
  56.                     )
  57.                 while True:
  58.                     chunk = dst_fd.read(buffer_size)
  59.                     if not chunk:
  60.                         break
  61.                     if progress:
  62.                         progress_bar.update(len(chunk))
  63.                     dst_hasher.update(chunk)
  64.                 if progress:
  65.                     progress_bar.close()
  66.                 check_result = True
  67.                 if dst_hasher.digest() == hasher.digest():
  68.                     print('Written file is ok', file=sys.stderr)
  69.                 else:
  70.                     print('Written file is different', file=sys.stderr)
  71.                     check_result = False
  72.     return hasher.hexdigest(), check_result
  73.  
  74.  
  75. if __name__ == '__main__':
  76.     hashes = hashlib.algorithms_available
  77.     parser = ArgumentParser(description=__doc__, formatter_class=HelpFormatter)
  78.     parser.add_argument('src', type=Path, help='Source file to copy')
  79.     parser.add_argument('dst', type=Path, help='Destination file')
  80.     parser.add_argument('hash', choices=hashes, help='Hash algorithm')
  81.     parser.add_argument('--buffer', type=int, default=64 * 1024, help='Buffer size in bytes')
  82.     parser.add_argument('--overwrite', action='store_true', help='Allow overwriting of destination file')
  83.     parser.add_argument('--progress', action='store_true', help='Show a progress bar')
  84.     parser.add_argument('--check', action='store_true', help='Read the destination file again and calculate the hash value')
  85.     args = parser.parse_args()
  86.     if not args.src.exists():
  87.         print(f'Source file {args.src.name} does not exist.', file=sys.stderr)
  88.         sys.exit(2)
  89.     if not args.overwrite and args.dst.exists():
  90.         print(f'Destination file {args.dst.name} exists.', file=sys.stderr)
  91.         sys.exit(3)
  92.     if args.progress:
  93.         try:
  94.             from tqdm import tqdm
  95.         except ImportError:
  96.             print('Python-Module tqdm is not installed, installing it now.', file=sys.stderr)
  97.             call([sys.executable, '-m', 'pip', 'install', 'tqdm', '--user'])
  98.             try:
  99.                 from tqdm import tqdm
  100.             except ImportError:
  101.                 args.progress = False
  102.             else:
  103.                 args.progress = True
  104.         else:
  105.             args.progress = True
  106.     hex_digest, check_result = copy(args.src, args.dst, args.buffer, args.hash, args.progress, args.check)
  107.     args.dst.with_suffix('.' + args.hash).write_text(f'{hex_digest}  {args.dst.name}\n')
  108.     if not check_result:
  109.         sys.exit(10)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement