Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- #import libraries to be used
- import sys
- import os
- import collections
- import itertools
- #create a list of the files whose names I'd like to change
- old_file_names = os.listdir(".")
- num_files=len(old_file_names)
- #this next threshold value signifies the minimum number of files that a word must appear in to be deleted
- repetition_threshold = num_files-4
- #start with the list of files, extract the prefix and suffix
- #convert different kinds of word breaks such as '_' '-' ',' '.' and convert them all to spaces
- #tidy up the file names, removing trailing and extra white spaces
- #at the moment no renaming is done. Simply a list of names has been tidied
- file_suffixes=[x[-4:] for x in old_file_names]
- new_file_prefixes=[x[:-4] for x in old_file_names]
- new_file_prefixes=[x.replace("_"," ") for x in new_file_prefixes]
- new_file_prefixes=[x.replace("-"," ") for x in new_file_prefixes]
- new_file_prefixes=[x.replace("."," ") for x in new_file_prefixes]
- new_file_prefixes=[x.replace(" "," ") for x in new_file_prefixes]
- new_file_prefixes=[x.replace(" "," ") for x in new_file_prefixes]
- new_file_prefixes=[x.replace(" "," ") for x in new_file_prefixes]
- new_file_prefixes=[x.replace(" "," ") for x in new_file_prefixes]
- new_file_prefixes=[x.strip() for x in new_file_prefixes]
- #Now need to make a list of all the words used in all the files.
- #From this we will determine if any words are repeated. These are the words we will remove.
- words_in_file_names = [file_prefix.split() for file_prefix in new_file_prefixes]
- #now flatten this list
- words_in_file_names = list(itertools.chain.from_iterable(words_in_file_names))
- #this next list dict contains all the words which have been repeated a lot.
- dict= [x for x, y in collections.Counter(words_in_file_names).items() if y > repetition_threshold]
- #if there are common (ie repeated) words, delete them from the list of tidied file names
- #if there are no common words, I would still like the tidied names to be applied to the files, so the script continues below
- if len(dict)>0:
- for word in dict:
- new_file_prefixes=[x.replace(word,"") for x in new_file_prefixes]
- #reconstruct the new file names as prefix + suffix
- new_file_names=[new_file_prefixes+file_suffixes for new_file_prefixes,file_suffixes in zip(new_file_prefixes,file_suffixes)]
- #finally, perform the renaming
- [os.rename(old_file_names[i], new_file_names[i]) for i in range(num_files)]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement