daily pastebin goal
78%
SHARE
TWEET

organizePDFs

a guest Oct 1st, 2015 114 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #-------------------------------------------------------------------------------
  2. # Name:        organizePDFs
  3. # Purpose:     creates folders for batch organizing pdf files by filename prefix
  4. # Author:      noah.huntington
  5. # Created:     28/09/2015
  6. # Copyright:   (c) noah.huntington 2015
  7. # Licence:     <your licence>
  8. #-------------------------------------------------------------------------------
  9. import os, shutil, re
  10.  
  11. def main():
  12.         uniqueSet = set()
  13.         # pls substitute with your own path
  14.         path = r'R:\2012 Scanned Documents'
  15.         os.chdir(path)
  16.         directoryList = []
  17.         count = 1
  18.  
  19.  
  20.         for dirpath, dirnames, filenames in os.walk(path):
  21.             for dirname in dirnames:
  22.                 if not dirname[:1].isdigit():
  23.                     directoryList.append(os.path.join(path, dirname))
  24.         for directory in directoryList:
  25. ##            print directory
  26.  
  27.             os.chdir(directory)
  28.             for dirpath, dirnames, filenames in os.walk(os.getcwd()):
  29.                 print '########################################################'
  30.                 current = os.getcwd()
  31.                 print current
  32.                 print '########################################################'
  33.                 for filename in filenames:
  34.                     # Search only pdfs
  35.                     if filename.endswith(('.pdf','.PDF')):
  36.                         # Find the index position of first non-numeric character
  37.                         m = re.search("\D", filename)
  38.                         s = m.start()
  39.                         # Walk files looking for unique values of filename[5:]
  40.                         if filename[s:] == filename[s:]:
  41.                              # add them to set
  42.                             uniqueSet.add(filename[:s])
  43.                 for item in uniqueSet:
  44.                     if not os.path.exists(os.path.join(current,item)):
  45.                         try:
  46.                             os.mkdir(os.path.join(current,item))
  47.                         except:
  48.                             pass
  49.  
  50.                 uniqueSet = set()
  51.  
  52.  
  53. #-------------------------------------------------------------------------------
  54. # Purpose:     this portion copies pdf documents into folder w/ same prefix
  55. #-------------------------------------------------------------------------------
  56.         os.chdir(path)
  57.  
  58.         for root, dirs, files in os.walk(path):
  59.             print '###############################################################'
  60.             current = root
  61.             print current
  62.             print '###############################################################'
  63.             for filename in files:
  64.                 # Search only pdfs
  65.                 if filename.endswith(('.pdf','.PDF')):
  66.                     # Find the index position of first non-numeric character
  67.                     m = re.search("\D", filename)
  68.                     s = m.start()
  69.                     # Walk files looking for unique values of filename[s:]
  70.                     if filename[s:] == filename[s:]:
  71.                         src = os.path.join(current, filename)
  72.                         print "src = " + src
  73.                         dstBase = os.path.split(src)[0]
  74.                         print "dstBase = " + dstBase
  75.                         dstTail = filename[:s]
  76.                         print "dstTail = " + dstTail
  77.                         print '---------------------------------------------------------'
  78. ##                        try:
  79. ##                            shutil.copy(src, os.path.join(dstBase,dstTail))
  80. ##                        except:
  81. ##                            pass
  82.  
  83.             print " "
  84.             print " "
  85.             print " "
  86.  
  87.  
  88. if __name__ == '__main__':
  89.     main()
RAW Paste Data
Top