Advertisement
Guest User

dkremer

a guest
Dec 20th, 2008
525
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.67 KB | None | 0 0
  1. #!/usr/bin/env python3                                
  2.  
  3. import hashlib
  4. import os    
  5. import sys    
  6.  
  7. if __name__='__main__':
  8.     print("this script will remove some files on your computer")
  9.     print("are you SURE you want to continue ? y for yes and n for no")
  10.     response = input()                                                
  11.     if response != 'y' :                                              
  12.         print("Ok, now exiting")                                      
  13.         sys.exit(1)                                                    
  14.     else :                                                            
  15.         print("Ok, now searching for duplicata")                      
  16.         main()                                                        
  17.         sys.exit(0)                                                    
  18.                                                                        
  19. def main():                                                            
  20.  
  21.     file_tree = os.walk(os.curdir)  # path iterator
  22.     checksum_list = list()  # list files with including checksum
  23.  
  24.     for a_folder_tuple in file_tree : #  doing the checksum list
  25.  
  26.         list_files_folder = a_folder_tuple[2]
  27.         current_dir = a_folder_tuple[0]
  28.         print("check_sim is now checking the folder",current_dir)
  29.  
  30.         for a_pic in list_files_folder :
  31.  
  32.             string_filename = current_dir+os.sep+a_pic
  33.             fd_pic = open(string_filename,'rb')  #read only, binary mode
  34.             input_buffer = fd_pic.read()  # checksum on the whole file
  35.             fd_pic.close()  # throw this file descriptor and close the file
  36.             my_checksum = hashlib.md5(input_buffer)  # compute checksum
  37.             #keep filename and checksum in this list
  38.             checksum_list.append( ( my_checksum.hexdigest(), string_filename ) )
  39.  
  40.     checksum_list.sort( key = lambda x : x[0] )  # alphabetically sorted by filename
  41.  
  42.     same_files = [ ]  # the same files stored
  43.  
  44.     for i in range(len(checksum_list)-1) :  # checking for duplicate files
  45.         same_files.append( checksum_list[i] )  # and deleting them
  46.  
  47.         # Match different files ? Remove the identic files previous
  48.         if checksum_list[i+1][0] != checksum_list[i][0] :
  49.             if len(same_files) > 1 :
  50.                 files_to_delete = [ k[1] for k in same_files ]
  51.                 print("This files are same",files_to_delete)
  52.                 for k in range(len(files_to_delete)-1):
  53.                     print("deleting",files_to_delete[k])
  54.                     os.remove(files_to_delete[k])
  55.                 print("keeping",files_to_delete[k+1])
  56.             same_files = [ ]  # erase duplicata list
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement