Advertisement
Guest User

benchmark_hashlib_py3.py

a guest
May 9th, 2023
189
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.44 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. import contextlib
  4. import hashlib
  5. import os
  6. import random
  7. import string
  8. import tempfile
  9. import timeit
  10.  
  11.  
  12. @contextlib.contextmanager
  13. def createdummyfiles():
  14.     """
  15.    Create a set of files at targetpath with random strings
  16.    Outer for loop decides number of files with range specifying file size
  17.    """
  18.     with tempfile.TemporaryDirectory() as dir:
  19.  
  20.         def _():
  21.             for sizectr in range(5, 25):
  22.                 filename = "file" + str(sizectr) + ".txt"
  23.                 fullfilename = os.path.join(dir, filename)
  24.                 with open(fullfilename, "w") as f:
  25.                     for ctr in range(2**sizectr):
  26.                         randomstring = "".join([random.choice(string.ascii_letters) for i in range(128)])
  27.                         f.write(randomstring)
  28.                     print("File created: " + filename + " Size: " + str(os.path.getsize(fullfilename)))
  29.                     yield filename, fullfilename
  30.  
  31.         yield _
  32.  
  33.  
  34. def hashchunks(testfile, blk_size):
  35.     filehash = hashlib.md5()
  36.     with open(testfile, "rb") as f:
  37.         while True:
  38.             read_data = f.read(blk_size)
  39.             if not read_data:
  40.                 break
  41.             filehash.update(read_data)
  42.     filehash.digest()
  43.  
  44.  
  45. def hashcomplete(testfile):
  46.     filehash = hashlib.md5()
  47.     with open(testfile, "rb") as f:
  48.         read_data = f.read()
  49.         filehash.update(read_data)
  50.     filehash.digest()
  51.  
  52.  
  53. if __name__ == "__main__":
  54.     result_list = []  # list (of lists) to record file stats
  55.  
  56.     with createdummyfiles() as dummy_files:
  57.         for filename, fullfilename in dummy_files():
  58.             result = []  # list to record stats of the file
  59.             filesize = os.path.getsize(fullfilename)
  60.  
  61.             # initialize counters
  62.             least_time = 1000
  63.             least_blk_size = 0
  64.  
  65.             num_iter = 100
  66.  
  67.             print(
  68.                 "File: {} Size: {} Number of iterations for timing: {}".format(
  69.                     filename, filesize, num_iter
  70.                 )
  71.             )
  72.             result.append(filename)
  73.             result.append(filesize)
  74.             result.append(num_iter)
  75.             # first try the hashing file by breaking it up into smaller chunks
  76.             for ctr in range(6, 21):
  77.                 blk_size = 2**ctr
  78.                 funcstr = "hashchunks('{}', {})".format(fullfilename, str(blk_size))
  79.                 exec_time = timeit.timeit(
  80.                     funcstr, setup="from __main__ import hashchunks", number=num_iter
  81.                 )
  82.                 if exec_time < least_time:
  83.                     least_time = exec_time
  84.                     least_blk_size = blk_size
  85.             print("+++ Most efficient Chunk Size: {} Time taken: {}".format(least_blk_size, least_time))
  86.             result.append(least_blk_size)
  87.             result.append(least_time)
  88.  
  89.             # now try to hash the file all in one go
  90.             funcstr = "hashcomplete('{}')".format(fullfilename)
  91.             timetaken_complete = timeit.timeit(
  92.                 funcstr, setup="from __main__ import hashcomplete", number=num_iter
  93.             )
  94.             print("+++ Time taken for hashing complete file: {}".format(timetaken_complete))
  95.             result.append(timetaken_complete)
  96.             print("====================================================================")
  97.             result_list.append(result)
  98.  
  99.     for res in result_list:
  100.         print(res)
  101.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement