Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/volume1/@appstore/python/bin/python2
- #-*- coding: iso-8859-15 -*-
- #
- # If a file has been archieved under an ISO-8859 environment and unarchived
- # under an UTF8 environment, then you will get an encoding format problem.
- # The file will not be readable through SAMBA.
- #
- # To fix this problem, you must convert the encoding format
- # to the UTF8 (default Synology encoding)
- #
- # NB: in all cases, files will be readable through samba, even if the detection
- # failed. But converted characters will not be good
- #
- # Remark: I guess it should work for any other encoding style. Just replace
- # ISO-8859-15 (Western Europe) by the one coresponding to your country:
- # http://en.wikipedia.org/wiki/Character_encoding#Common_character_encodings
- #
- # get library modules
- import sys, os
- ########################
- # ----- Functions ---- #
- ########################
- # Special character hex range:
- # CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15)
- # UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF
- # ISO-8859-15: 0xA6-0xFF
- # The function will detect if fileDirName contains a special character
- # If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding
- def renameFunc(fullPath, fileDirName):
- encodingDetected = False
- Count = 1
- # parsing all files/directories in order to detect if CP850 is used
- for Idx in range(len(fileDirName)):
- # /!\ detection is done 2char by 2char for UTF-8 special character
- if (len(fileDirName) != 1) & (Idx < (len(fileDirName) - 1)):
- # Detect UTF-8
- if ((fileDirName[Idx] == '\xC2') | (fileDirName[Idx] == '\xC3')) & ((fileDirName[Idx+1] >= '\xA0') & (fileDirName[Idx+1] <= '\xFF')):
- print os.path.join(fullPath, fileDirName) + " -> UTF-8 detected: Nothing to be done"
- encodingDetected = True
- break;
- # Detect CP850
- elif ((fileDirName[Idx] >= '\x80') & (fileDirName[Idx] <= '\xA5')):
- utf8Name = fileDirName.decode('cp850')
- utf8Name = utf8Name.encode('utf-8')
- # os.rename(os.path.join(fullPath, fileDirName), os.path.join(fullPath, utf8Name))
- print os.path.join(fullPath, fileDirName)
- print os.path.join(fullPath, utf8Name) + " -> CP850 detected: Renamed"
- encodingDetected = True
- break;
- # Detect ISO-8859-15
- elif (fileDirName[Idx] >= '\xA6') & (fileDirName[Idx] <= '\xFF'):
- utf8Name = fileDirName.decode('iso-8859-15')
- utf8Name = utf8Name.encode('utf-8')
- # os.rename(os.path.join(fullPath, fileDirName), os.path.join(fullPath, utf8Name))
- print os.path.join(fullPath, fileDirName)
- print os.path.join(fullPath, utf8Name) + " -> ISO-8859-15 detected: Renamed"
- encodingDetected = True
- break;
- else:
- # Detect CP850
- if ((fileDirName[Idx] >= '\x80') & (fileDirName[Idx] <= '\xA5')):
- utf8Name = fileDirName.decode('cp850')
- utf8Name = utf8Name.encode('utf-8')
- # os.rename(os.path.join(fullPath, fileDirName), os.path.join(fullPath, utf8Name))
- print os.path.join(fullPath, fileDirName)
- print os.path.join(fullPath, utf8Name) + " -> CP850 detected: Renamed"
- encodingDetected = True
- break;
- # Detect ISO-8859-15
- elif (fileDirName[Idx] >= '\xA6') & (fileDirName[Idx] <= '\xFF'):
- utf8Name = fileDirName.decode('iso-8859-15')
- utf8Name = utf8Name.encode('utf-8')
- # os.rename(os.path.join(fullPath, fileDirName), os.path.join(fullPath, utf8Name))
- print os.path.join(fullPath, fileDirName)
- print os.path.join(fullPath, utf8Name) + " -> ISO-8859-15 detected: Renamed"
- encodingDetected = True
- break;
- if (encodingDetected == False):
- print os.path.join(fullPath, fileDirName) + " -> No special characters detected: Nothing to be done"
- return
- ###########################
- # ----- Main Program ---- #
- ###########################
- StartFolder = "/media/usenet/TV"
- # process each sub-folders starting from the deepest level
- print 80*'-'
- print "Renaming folders to UTF-8 format..."
- for dirname, dirnames, filenames in os.walk(StartFolder, topdown=False):
- for subdirname in dirnames:
- renameFunc(dirname, subdirname)
- print "Folder renaming Done !"
- print 80*'-'
- print ""
- # process each file recursively
- print 80*'-'
- print "Renaming files to UTF-8 format..."
- for dirname, dirnames, filenames in os.walk(StartFolder):
- for filename in filenames:
- renameFunc(dirname, filename)
- print "Files renaming Done !"
- print 80*'-'
- print ""
- print ""
- print "Character encoding translation done!"
Add Comment
Please, Sign In to add comment