SHARE
TWEET

Untitled

a guest Dec 19th, 2015 58 Never
  1. #USAGE: md5-convert.py "directory_with_files" ".csv file"
  2.  
  3. import hashlib
  4. import codecs
  5. import sys
  6. import os
  7. import re
  8. import time
  9.  
  10. DIR = sys.argv[1]
  11. CSV_FILE= sys.argv[2]
  12. count = 0;
  13.  
  14. #gets md5 hash of file
  15. def md5(fname):
  16.     hash = hashlib.md5()
  17.     with open(fname, "rb") as f:
  18.         for chunk in iter(lambda: f.read(4096), b""):
  19.             hash.update(chunk)
  20.     return hash.hexdigest()
  21.  
  22. f = codecs.open(CSV_FILE, encoding='utf-8', mode='r')
  23. data = f.read()
  24. f.close()
  25.  
  26. with codecs.open(CSV_FILE, encoding='utf-8', mode='r') as f:
  27.   for line in f:
  28.  
  29.     line = line[:-1]
  30.     line = line.split('\t')
  31.     ext = re.search("\.jpg|\.jpeg|\.png|\.gif|\.webm", line[2]).group(0)
  32.     hash = line[1]
  33.    
  34.     #construct absolute path for the md5 function
  35.     path = sys.argv[1] + '\\'+ hash + ext
  36.    
  37.     #check to make sure the file exists
  38.     if os.path.isfile(path) and os.access(path,os.R_OK):
  39.         filehash = md5(path)
  40.         newdata = data.replace(hash,filehash)
  41.        
  42.         #write data to temp file
  43.         f2 = codecs.open('temp.csv',encoding='utf-8', mode='w')
  44.         f2.write(newdata)
  45.         f2.close()
  46.        
  47.         #set old data to the newdata
  48.         data = newdata
  49.         count+=1       
  50.         print "line #{count}  converted to md5.".format(count = count)
  51.         #time.sleep(.250)
  52.        
  53. print "All hashes have been converted."
RAW Paste Data
Top