Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import hashlib
- import os
- import sys
- if __name__='__main__':
- print("this script will remove some files on your computer")
- print("are you SURE you want to continue ? y for yes and n for no")
- response = input()
- if response != 'y' :
- print("Ok, now exiting")
- sys.exit(1)
- else :
- print("Ok, now searching for duplicata")
- main()
- sys.exit(0)
- def main():
- file_tree = os.walk(os.curdir) # path iterator
- checksum_list = list() # list files with including checksum
- for a_folder_tuple in file_tree : # doing the checksum list
- list_files_folder = a_folder_tuple[2]
- current_dir = a_folder_tuple[0]
- print("check_sim is now checking the folder",current_dir)
- for a_pic in list_files_folder :
- string_filename = current_dir+os.sep+a_pic
- fd_pic = open(string_filename,'rb') #read only, binary mode
- input_buffer = fd_pic.read() # checksum on the whole file
- fd_pic.close() # throw this file descriptor and close the file
- my_checksum = hashlib.md5(input_buffer) # compute checksum
- #keep filename and checksum in this list
- checksum_list.append( ( my_checksum.hexdigest(), string_filename ) )
- checksum_list.sort( key = lambda x : x[0] ) # alphabetically sorted by filename
- same_files = [ ] # the same files stored
- for i in range(len(checksum_list)-1) : # checking for duplicate files
- same_files.append( checksum_list[i] ) # and deleting them
- # Match different files ? Remove the identic files previous
- if checksum_list[i+1][0] != checksum_list[i][0] :
- if len(same_files) > 1 :
- files_to_delete = [ k[1] for k in same_files ]
- print("This files are same",files_to_delete)
- for k in range(len(files_to_delete)-1):
- print("deleting",files_to_delete[k])
- os.remove(files_to_delete[k])
- print("keeping",files_to_delete[k+1])
- same_files = [ ] # erase duplicata list
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement