Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __author__ = 'noway'
- import hashlib
- import os
- import shelve
- import unicodedata
- #Size of chunks to be hashed when hashing file contents.
- #That portion is necessary when hashing huge files,
- #which are problematic to be stored in memory
- CHUNK_SIZE = 4096
- def md5(file_name):
- #Compute md5 of file CONTENTS
- # file_name - path to file (ex. /Users/123/Desktop/file.txt)
- #Result:
- # 128-bit hex string = md5 hash
- hash_md5 = hashlib.md5()
- with open(file_name) as f:
- for chunk in iter(lambda: f.read(CHUNK_SIZE), ''):
- hash_md5.update(chunk)
- return hash_md5.hexdigest()
- def update_hash_names(path):
- #Produce SHELVE-database with dictionary [HASH:PATH]
- # path - path to the dictionary where all files and subdir should be hashed
- #Result:
- # Database named 'names_db'
- names_db = shelve.open('names_db')
- for root, dirs, files in os.walk(path):
- for name in files:
- file = root + "\\" + name
- hash_name = hashlib.md5(file).hexdigest()
- if not names_db.has_key(hash_name):
- names_db[hash_name] = file
- names_db.close()
- def compute_dir_hash(path, progress_bar=False):
- #Produce SHELVE-database with dictionary [HASH1:HASH2]
- #Where HASH1 is hash of path to file, HASH2 is hash of file contents
- # path - path to the directory to be hashed
- # (optional)progress_bar - set True if you want to see
- # approximate progress (MAY USE ADDITIONAL TIME)
- #Result:
- # Database named [path to directory]_db
- counter = 0
- counter_saved = 0
- if progress_bar:
- print "Computing all files in directory " + path + " and subdir..."
- for root, dirs, files in os.walk(path):
- for name in files:
- counter += 1
- print "There are " + str(counter) + " files there"
- counter_saved = counter
- counter = 0
- print "Hashing directory " + path + " and all subdir contents"
- dir_db = shelve.open(str(path + '_db').replace(":", '').replace("\\", ''))
- for root, dirs, files in os.walk(path):
- for name in files:
- if progress_bar:
- counter += 1
- print "\r{0} of {1}".format(counter, counter_saved),
- file_path = root + "/" + name #MAC OS
- #file_name = root + "\\" + name #WIN
- hash_name = hashlib.md5(file_path).hexdigest()
- try:
- hash_value = md5(file_path)
- except IOError:
- print "Permission Denied to compute md5 hash of " + file_path
- else:
- if not hash_name in dir_db:
- dir_db[hash_name] = hash_value
- dir_db.close()
- #update_hash_names('C:\Windows')
- #compute_dir_hash('C:\Windows')
- compute_dir_hash('/Users/pontifik/Desktop/Work', progress_bar=True)
- print "DONE"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement