Guest User

Untitled

a guest
Jun 23rd, 2018
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.00 KB | None | 0 0
  1. import sys
  2. import os
  3. import hashlib
  4.  
  5. def chunk_reader(fobj, chunk_size=1024):
  6. """Generator that reads a file in chunks of bytes"""
  7. while True:
  8. chunk = fobj.read(chunk_size)
  9. if not chunk:
  10. return
  11. yield chunk
  12.  
  13. def check_for_duplicates(paths, hash=hashlib.sha1):
  14. hashes = {}
  15. for path in paths:
  16. for dirpath, dirnames, filenames in os.walk(path):
  17. for filename in filenames:
  18. full_path = os.path.join(dirpath, filename)
  19. hashobj = hash()
  20. for chunk in chunk_reader(open(full_path, 'rb')):
  21. hashobj.update(chunk)
  22. file_id = (hashobj.digest(), os.path.getsize(full_path))
  23. duplicate = hashes.get(file_id, None)
  24. if duplicate:
  25. print "Duplicate found: %s and %s" % (full_path, duplicate)
  26. else:
  27. hashes[file_id] = full_path
  28.  
  29. if sys.argv[1:]:
  30. check_for_duplicates(sys.argv[1:])
  31. else:
  32. print "Please pass the paths to check as parameters to the script"
  33.  
  34. import sys
  35. import os
  36. import hashlib
  37.  
  38. check_path = (lambda filepath, hashes, p = sys.stdout.write:
  39. (lambda hash = hashlib.sha1 (file (filepath).read ()).hexdigest ():
  40. ((hash in hashes) and (p ('DUPLICATE FILEn'
  41. ' %sn'
  42. 'of %sn' % (filepath, hashes[hash])))
  43. or hashes.setdefault (hash, filepath)))())
  44.  
  45. scan = (lambda dirpath, hashes = {}:
  46. map (lambda (root, dirs, files):
  47. map (lambda filename: check_path (os.path.join (root, filename), hashes), files), os.walk (dirpath)))
  48.  
  49. ((len (sys.argv) > 1) and scan (sys.argv[1]))
  50.  
  51. def remove_duplicates(dir):
  52. unique = []
  53. for filename in os.listdir(dir)
  54. if os.path.isfile(filename):
  55. filehash = md5.md5(file(filename).read()).hexdigest()
  56. if filehash not in unique: unique.append(filehash)
  57. else: os.remove(filename)
Add Comment
Please, Sign In to add comment