Advertisement
Guest User

Untitled

a guest
Sep 2nd, 2015
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.84 KB | None | 0 0
  1. __author__ = 'noway'
  2.  
  3. import hashlib
  4. import os
  5. import shelve
  6. import unicodedata
  7.  
  8. #Size of chunks to be hashed when hashing file contents.
  9. #That portion is necessary when hashing huge files,
  10. #which are problematic to be stored in memory
  11. CHUNK_SIZE = 4096
  12.  
  13.  
  14. def md5(file_name):
  15. #Compute md5 of file CONTENTS
  16. #   file_name - path to file (ex. /Users/123/Desktop/file.txt)
  17. #Result:
  18. #   128-bit hex string = md5 hash
  19.  
  20.     hash_md5 = hashlib.md5()
  21.     with open(file_name) as f:
  22.         for chunk in iter(lambda: f.read(CHUNK_SIZE), ''):
  23.             hash_md5.update(chunk)
  24.     return hash_md5.hexdigest()
  25.  
  26.  
  27. def update_hash_names(path):
  28. #Produce SHELVE-database with dictionary [HASH:PATH]
  29. #   path - path to the dictionary where all files and subdir should be hashed
  30. #Result:
  31. #   Database named 'names_db'
  32.     names_db = shelve.open('names_db')
  33.     for root, dirs, files in os.walk(path):
  34.         for name in files:
  35.             file = root + "\\" + name
  36.             hash_name = hashlib.md5(file).hexdigest()
  37.             if not names_db.has_key(hash_name):
  38.                 names_db[hash_name] = file
  39.     names_db.close()
  40.  
  41.  
  42. def compute_dir_hash(path, progress_bar=False):
  43. #Produce SHELVE-database with dictionary [HASH1:HASH2]
  44. #Where HASH1 is hash of path to file, HASH2 is hash of file contents
  45. #   path - path to the directory to be hashed
  46. #   (optional)progress_bar - set True if you want to see
  47. #       approximate progress (MAY USE ADDITIONAL TIME)
  48. #Result:
  49. #   Database named [path to directory]_db
  50.     counter = 0
  51.     counter_saved = 0
  52.     if progress_bar:
  53.         print "Computing all files in directory " + path + " and subdir..."
  54.         for root, dirs, files in os.walk(path):
  55.             for name in files:
  56.                 counter += 1
  57.         print "There are " + str(counter) + " files there"
  58.         counter_saved = counter
  59.         counter = 0
  60.  
  61.     print "Hashing directory " + path + " and all subdir contents"
  62.     dir_db = shelve.open(str(path + '_db').replace(":", '').replace("\\", ''))
  63.  
  64.     for root, dirs, files in os.walk(path):
  65.         for name in files:
  66.             if progress_bar:
  67.                 counter += 1
  68.                 print "\r{0} of {1}".format(counter, counter_saved),
  69.  
  70.             file_path = root + "/" + name        #MAC OS
  71.             #file_name = root + "\\" + name      #WIN
  72.  
  73.             hash_name = hashlib.md5(file_path).hexdigest()
  74.  
  75.             try:
  76.                 hash_value = md5(file_path)
  77.             except IOError:
  78.                 print "Permission Denied to compute md5 hash of " + file_path
  79.             else:
  80.                 if not hash_name in dir_db:
  81.                     dir_db[hash_name] = hash_value
  82.     dir_db.close()
  83.  
  84. #update_hash_names('C:\Windows')
  85. #compute_dir_hash('C:\Windows')
  86. compute_dir_hash('/Users/pontifik/Desktop/Work', progress_bar=True)
  87.  
  88. print "DONE"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement