Advertisement
Guest User

remove common words from filenames

a guest
Sep 15th, 2013
301
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.41 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. #import libraries to be used
  4. import sys
  5. import os
  6. import collections
  7. import itertools
  8.  
  9. #create a list of the files whose names I'd like to change
  10. old_file_names = os.listdir(".")
  11. num_files=len(old_file_names)
  12.  
  13. #this next threshold value signifies the minimum number of files that a word must appear in to be deleted
  14. repetition_threshold = num_files-4
  15.  
  16. #start with the list of files, extract the prefix and suffix
  17. #convert different kinds of word breaks such as '_' '-' ',' '.' and convert them all to spaces
  18. #tidy up the file names, removing trailing and extra white spaces
  19. #at the moment no renaming is done.  Simply a list of names has been tidied
  20. file_suffixes=[x[-4:] for x in old_file_names]
  21. new_file_prefixes=[x[:-4] for x in old_file_names]
  22. new_file_prefixes=[x.replace("_"," ") for x in new_file_prefixes]
  23. new_file_prefixes=[x.replace("-"," ") for x in new_file_prefixes]
  24. new_file_prefixes=[x.replace("."," ") for x in new_file_prefixes]
  25. new_file_prefixes=[x.replace("  "," ") for x in new_file_prefixes]
  26. new_file_prefixes=[x.replace("  "," ") for x in new_file_prefixes]
  27. new_file_prefixes=[x.replace("  "," ") for x in new_file_prefixes]
  28. new_file_prefixes=[x.replace("  "," ") for x in new_file_prefixes]
  29. new_file_prefixes=[x.strip() for x in new_file_prefixes]
  30.  
  31. #Now need to make a list of all the words used in all the files.
  32. #From this we will determine if any words are repeated.  These are the words we will remove.
  33. words_in_file_names = [file_prefix.split() for file_prefix in new_file_prefixes]
  34. #now flatten this list
  35. words_in_file_names = list(itertools.chain.from_iterable(words_in_file_names))
  36. #this next list dict contains all the words which have been repeated a lot.
  37. dict= [x for x, y in collections.Counter(words_in_file_names).items() if y > repetition_threshold]
  38.  
  39. #if there are common (ie repeated) words, delete them from the list of tidied file names
  40. #if there are no common words, I would still like the tidied names to be applied to the files, so the script continues below
  41. if len(dict)>0:
  42.     for word in dict:
  43.         new_file_prefixes=[x.replace(word,"") for x in new_file_prefixes]
  44.  
  45. #reconstruct the new file names as prefix + suffix
  46. new_file_names=[new_file_prefixes+file_suffixes for new_file_prefixes,file_suffixes in zip(new_file_prefixes,file_suffixes)]
  47.  
  48. #finally, perform the renaming
  49. [os.rename(old_file_names[i], new_file_names[i]) for i in range(num_files)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement