Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #-------------------------------------------------------------------------------
- # Name: organizePDFs
- # Purpose: creates folders for batch organizing pdf files by filename prefix
- # Author: noah.huntington
- # Created: 28/09/2015
- # Copyright: (c) noah.huntington 2015
- # Licence: <your licence>
- #-------------------------------------------------------------------------------
- import os, shutil, re
- def main():
- uniqueSet = set()
- # pls substitute with your own path
- path = r'R:\2012 Scanned Documents'
- os.chdir(path)
- directoryList = []
- count = 1
- for dirpath, dirnames, filenames in os.walk(path):
- for dirname in dirnames:
- if not dirname[:1].isdigit():
- directoryList.append(os.path.join(path, dirname))
- for directory in directoryList:
- ## print directory
- os.chdir(directory)
- for dirpath, dirnames, filenames in os.walk(os.getcwd()):
- print '########################################################'
- current = os.getcwd()
- print current
- print '########################################################'
- for filename in filenames:
- # Search only pdfs
- if filename.endswith(('.pdf','.PDF')):
- # Find the index position of first non-numeric character
- m = re.search("\D", filename)
- s = m.start()
- # Walk files looking for unique values of filename[5:]
- if filename[s:] == filename[s:]:
- # add them to set
- uniqueSet.add(filename[:s])
- for item in uniqueSet:
- if not os.path.exists(os.path.join(current,item)):
- try:
- os.mkdir(os.path.join(current,item))
- except:
- pass
- uniqueSet = set()
- #-------------------------------------------------------------------------------
- # Purpose: this portion copies pdf documents into folder w/ same prefix
- #-------------------------------------------------------------------------------
- os.chdir(path)
- for root, dirs, files in os.walk(path):
- print '###############################################################'
- current = root
- print current
- print '###############################################################'
- for filename in files:
- # Search only pdfs
- if filename.endswith(('.pdf','.PDF')):
- # Find the index position of first non-numeric character
- m = re.search("\D", filename)
- s = m.start()
- # Walk files looking for unique values of filename[s:]
- if filename[s:] == filename[s:]:
- src = os.path.join(current, filename)
- print "src = " + src
- dstBase = os.path.split(src)[0]
- print "dstBase = " + dstBase
- dstTail = filename[:s]
- print "dstTail = " + dstTail
- print '---------------------------------------------------------'
- ## try:
- ## shutil.copy(src, os.path.join(dstBase,dstTail))
- ## except:
- ## pass
- print " "
- print " "
- print " "
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement