Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import sys
- import hashlib
- BUF_SIZE = 65536 # lets read stuff in 64kb chunks!
- hashStash = open('E:\\thread scraping\\hashStash.txt', 'r+')
- stashListed = []
- for line in hashStash.readlines():
- stashListed.append(line.rstrip())
- for element in stashListed:
- print(element)
- def check(checkValue, currentFilename):
- unique = True
- for line in stashListed:
- if checkValue in line:
- os.remove('E:\\thread scraping\\to sort\\' + currentFilename)
- print('File removed.')
- unique = False
- if unique == True:
- hashStash.write(checkValue + '\n')
- os.rename('E:\\thread scraping\\to sort\\' + currentFilename, 'E:\\thread scraping\\the pit\\' + currentFilename)
- print('File moved.')
- # input('Press any key to terminate the check function...')
- fileList = []
- for dirname, dirnames, filenames in os.walk('E:\\thread scraping\\to sort\\'):
- for filename in filenames:
- fileList.append(os.path.join(filename))
- #print(fileList)
- print(range(len(fileList)))
- for i in range(len(fileList)):
- currentFilename = fileList[i]
- md5 = hashlib.md5() # !!!!!!!!!
- sha1 = hashlib.sha1()
- with open('E:\\thread scraping\\to sort\\' + currentFilename, 'rb') as f:
- while True:
- data = f.read()
- if not data:
- break
- md5.update(data)
- sha1.update(data)
- size = os.path.getsize('E:\\thread scraping\\to sort\\' + currentFilename)
- print("MD5: {0}".format(md5.hexdigest()))
- print("SHA1: {0}".format(sha1.hexdigest()))
- print(size)
- md5Value = format(md5.hexdigest())
- checkValue = md5Value + '___' + str(size)
- print('Current hash/size values: ' + checkValue)
- check(checkValue, currentFilename)
- input('Press any key to continue...')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement