Advertisement
Guest User

Untitled

a guest
Oct 24th, 2016
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.52 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. """Hash files, like md5sum or shasum, only faster. Works with all
  4. hash functions Python's hashlib supports."""
  5.  
  6. from __future__ import print_function
  7. from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
  8. from hashlib import algorithms_available
  9. from hashlib import new as new_hash
  10. from io import open
  11. from multiprocessing import Pool, cpu_count
  12. from multiprocessing.dummy import Pool as ThreadPool
  13. from sys import stderr
  14.  
  15.  
  16. class HashFile(object):
  17. def __init__(self, algo='sha1'):
  18. self.algo = algo
  19.  
  20. def __call__(self, file_path):
  21. return self.hash_file(file_path)
  22.  
  23. def hash_file(self, file_path):
  24. file_hash = new_hash(self.algo)
  25. one_mb = 1024 * 1024
  26. # Protect against zero or no block_size
  27. block_size = max(file_hash.block_size, 1)
  28. # Feed ~1 MiB at a time to the hash algo, as a multiple of block_size
  29. buf_size = block_size * (one_mb // block_size)
  30. try:
  31. with open(file_path, 'rb') as f:
  32. f.seek(0)
  33. while True:
  34. buf = f.read(buf_size)
  35. if buf:
  36. file_hash.update(buf)
  37. else:
  38. break
  39. except IOError as e:
  40. print(e, file=stderr)
  41. return None, file_path
  42.  
  43. return file_hash.hexdigest(), file_path
  44.  
  45.  
  46. class FileHashes(object):
  47. def __init__(
  48. self, files=None, thread_count=None, use_real_procs=False,
  49. use_openssl=False, algo='sha1'
  50. ):
  51. self.files = files if files else []
  52. self.thread_count = thread_count if thread_count else max(cpu_count() // 2, 1)
  53. self.use_real_procs = use_real_procs
  54. self.use_openssl = use_openssl
  55. self.algo = algo
  56.  
  57. def hashes(self):
  58. if self.use_real_procs:
  59. pool = Pool(self.thread_count)
  60. else:
  61. pool = ThreadPool(self.thread_count)
  62.  
  63. # Pool.imap can't pickle functions with non-dummy subprocesses,
  64. # but it can pickle instances of classes.
  65. # That is why the hash function is implemented as callabled class instead
  66. # of class methods.
  67. hasher = HashFile(self.algo)
  68.  
  69. for file_hash, file_path in pool.imap(hasher, self.files):
  70. if file_hash:
  71. yield file_hash, file_path
  72.  
  73.  
  74. class CommandLine(object):
  75. def __call__(self):
  76. self.args = self._args()
  77. file_hashes = FileHashes(
  78. files=self.args.files,
  79. thread_count=self.args.thread_count,
  80. use_real_procs=self.args.use_real_procs,
  81. algo=self.args.algo,
  82. )
  83.  
  84. for file_hash, file_path in file_hashes.hashes():
  85. print(file_path, '=', file_hash)
  86.  
  87. def _args(self):
  88. parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
  89.  
  90. parser.add_argument('files', nargs='+')
  91. parser.add_argument(
  92. '-a', '--algorithm', dest='algo',
  93. default='sha1', choices=sorted(algorithms_available),
  94. help='Hash algorithm to use'
  95. )
  96. parser.add_argument(
  97. '-t', '--threads', dest='thread_count',
  98. type=int, default=max(cpu_count() // 2, 1),
  99. help='Number of threads or processes to use'
  100. )
  101. parser.add_argument(
  102. '-p', '--real-processes', dest='use_real_procs', action='store_true',
  103. help='Use full processes instead of threads'
  104. )
  105.  
  106. args = parser.parse_args()
  107. return args
  108.  
  109.  
  110. if __name__ == '__main__':
  111. cmd = CommandLine()
  112. cmd()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement