Guest User

organizePDFs

a guest
Oct 1st, 2015
248
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #-------------------------------------------------------------------------------
  2. # Name: organizePDFs
  3. # Purpose: creates folders for batch organizing pdf files by filename prefix
  4. # Author: noah.huntington
  5. # Created: 28/09/2015
  6. # Copyright: (c) noah.huntington 2015
  7. # Licence: <your licence>
  8. #-------------------------------------------------------------------------------
  9. import os, shutil, re
  10.  
  11. def main():
  12. uniqueSet = set()
  13. # pls substitute with your own path
  14. path = r'R:\2012 Scanned Documents'
  15. os.chdir(path)
  16. directoryList = []
  17. count = 1
  18.  
  19.  
  20. for dirpath, dirnames, filenames in os.walk(path):
  21. for dirname in dirnames:
  22. if not dirname[:1].isdigit():
  23. directoryList.append(os.path.join(path, dirname))
  24. for directory in directoryList:
  25. ## print directory
  26.  
  27. os.chdir(directory)
  28. for dirpath, dirnames, filenames in os.walk(os.getcwd()):
  29. print '########################################################'
  30. current = os.getcwd()
  31. print current
  32. print '########################################################'
  33. for filename in filenames:
  34. # Search only pdfs
  35. if filename.endswith(('.pdf','.PDF')):
  36. # Find the index position of first non-numeric character
  37. m = re.search("\D", filename)
  38. s = m.start()
  39. # Walk files looking for unique values of filename[5:]
  40. if filename[s:] == filename[s:]:
  41. # add them to set
  42. uniqueSet.add(filename[:s])
  43. for item in uniqueSet:
  44. if not os.path.exists(os.path.join(current,item)):
  45. try:
  46. os.mkdir(os.path.join(current,item))
  47. except:
  48. pass
  49.  
  50. uniqueSet = set()
  51.  
  52.  
  53. #-------------------------------------------------------------------------------
  54. # Purpose: this portion copies pdf documents into folder w/ same prefix
  55. #-------------------------------------------------------------------------------
  56. os.chdir(path)
  57.  
  58. for root, dirs, files in os.walk(path):
  59. print '###############################################################'
  60. current = root
  61. print current
  62. print '###############################################################'
  63. for filename in files:
  64. # Search only pdfs
  65. if filename.endswith(('.pdf','.PDF')):
  66. # Find the index position of first non-numeric character
  67. m = re.search("\D", filename)
  68. s = m.start()
  69. # Walk files looking for unique values of filename[s:]
  70. if filename[s:] == filename[s:]:
  71. src = os.path.join(current, filename)
  72. print "src = " + src
  73. dstBase = os.path.split(src)[0]
  74. print "dstBase = " + dstBase
  75. dstTail = filename[:s]
  76. print "dstTail = " + dstTail
  77. print '---------------------------------------------------------'
  78. ## try:
  79. ## shutil.copy(src, os.path.join(dstBase,dstTail))
  80. ## except:
  81. ## pass
  82.  
  83. print " "
  84. print " "
  85. print " "
  86.  
  87.  
  88. if __name__ == '__main__':
  89. main()
RAW Paste Data