Guest User

Untitled

a guest
Apr 4th, 2016
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.90 KB | None | 0 0
  1. #!/volume1/@appstore/python/bin/python2
  2. #-*- coding: iso-8859-15 -*-
  3. #
  4. # If a file has been archieved under an ISO-8859 environment and unarchived
  5. # under an UTF8 environment, then you will get an encoding format problem.
  6. # The file will not be readable through SAMBA.
  7. #
  8. # To fix this problem, you must convert the encoding format
  9. # to the UTF8 (default Synology encoding)
  10. #
  11. # NB: in all cases, files will be readable through samba, even if the detection
  12. # failed. But converted characters will not be good
  13. #
  14. # Remark: I guess it should work for any other encoding style. Just replace
  15. # ISO-8859-15 (Western Europe) by the one coresponding to your country:
  16. # http://en.wikipedia.org/wiki/Character_encoding#Common_character_encodings
  17. #
  18.  
  19. # get library modules
  20. import sys, os
  21.  
  22. ########################
  23. # ----- Functions ---- #
  24. ########################
  25.  
  26. # Special character hex range:
  27. # CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15)
  28. # UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF
  29. # ISO-8859-15: 0xA6-0xFF
  30. # The function will detect if fileDirName contains a special character
  31. # If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding
  32. def renameFunc(fullPath, fileDirName):
  33. encodingDetected = False
  34. Count = 1
  35. # parsing all files/directories in order to detect if CP850 is used
  36. for Idx in range(len(fileDirName)):
  37. # /!\ detection is done 2char by 2char for UTF-8 special character
  38. if (len(fileDirName) != 1) & (Idx < (len(fileDirName) - 1)):
  39. # Detect UTF-8
  40. if ((fileDirName[Idx] == '\xC2') | (fileDirName[Idx] == '\xC3')) & ((fileDirName[Idx+1] >= '\xA0') & (fileDirName[Idx+1] <= '\xFF')):
  41. print os.path.join(fullPath, fileDirName) + " -> UTF-8 detected: Nothing to be done"
  42. encodingDetected = True
  43. break;
  44. # Detect CP850
  45. elif ((fileDirName[Idx] >= '\x80') & (fileDirName[Idx] <= '\xA5')):
  46. utf8Name = fileDirName.decode('cp850')
  47. utf8Name = utf8Name.encode('utf-8')
  48. # os.rename(os.path.join(fullPath, fileDirName), os.path.join(fullPath, utf8Name))
  49. print os.path.join(fullPath, fileDirName)
  50. print os.path.join(fullPath, utf8Name) + " -> CP850 detected: Renamed"
  51. encodingDetected = True
  52. break;
  53. # Detect ISO-8859-15
  54. elif (fileDirName[Idx] >= '\xA6') & (fileDirName[Idx] <= '\xFF'):
  55. utf8Name = fileDirName.decode('iso-8859-15')
  56. utf8Name = utf8Name.encode('utf-8')
  57. # os.rename(os.path.join(fullPath, fileDirName), os.path.join(fullPath, utf8Name))
  58. print os.path.join(fullPath, fileDirName)
  59. print os.path.join(fullPath, utf8Name) + " -> ISO-8859-15 detected: Renamed"
  60. encodingDetected = True
  61. break;
  62. else:
  63. # Detect CP850
  64. if ((fileDirName[Idx] >= '\x80') & (fileDirName[Idx] <= '\xA5')):
  65. utf8Name = fileDirName.decode('cp850')
  66. utf8Name = utf8Name.encode('utf-8')
  67. # os.rename(os.path.join(fullPath, fileDirName), os.path.join(fullPath, utf8Name))
  68. print os.path.join(fullPath, fileDirName)
  69. print os.path.join(fullPath, utf8Name) + " -> CP850 detected: Renamed"
  70. encodingDetected = True
  71. break;
  72. # Detect ISO-8859-15
  73. elif (fileDirName[Idx] >= '\xA6') & (fileDirName[Idx] <= '\xFF'):
  74. utf8Name = fileDirName.decode('iso-8859-15')
  75. utf8Name = utf8Name.encode('utf-8')
  76. # os.rename(os.path.join(fullPath, fileDirName), os.path.join(fullPath, utf8Name))
  77. print os.path.join(fullPath, fileDirName)
  78. print os.path.join(fullPath, utf8Name) + " -> ISO-8859-15 detected: Renamed"
  79. encodingDetected = True
  80. break;
  81. if (encodingDetected == False):
  82. print os.path.join(fullPath, fileDirName) + " -> No special characters detected: Nothing to be done"
  83. return
  84.  
  85.  
  86. ###########################
  87. # ----- Main Program ---- #
  88. ###########################
  89.  
  90. StartFolder = "/media/usenet/TV"
  91.  
  92. # process each sub-folders starting from the deepest level
  93. print 80*'-'
  94. print "Renaming folders to UTF-8 format..."
  95. for dirname, dirnames, filenames in os.walk(StartFolder, topdown=False):
  96. for subdirname in dirnames:
  97. renameFunc(dirname, subdirname)
  98. print "Folder renaming Done !"
  99. print 80*'-'
  100. print ""
  101.  
  102. # process each file recursively
  103. print 80*'-'
  104. print "Renaming files to UTF-8 format..."
  105. for dirname, dirnames, filenames in os.walk(StartFolder):
  106. for filename in filenames:
  107. renameFunc(dirname, filename)
  108. print "Files renaming Done !"
  109. print 80*'-'
  110. print ""
  111.  
  112.  
  113. print ""
  114. print "Character encoding translation done!"
Add Comment
Please, Sign In to add comment