Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /bin/bash
- set -e # Exit script immediately on first error.
- #set -x # Print commands and their arguments as they are executed.
- NON_UTF_FILE_DIR="wiki"
- PATTERN_FILE_NAME="*.md"
- find $NON_UTF_FILE_DIR -type f -name $PATTERN_FILE_NAME > utf8list
- iconv utf8list > asciilist
- i=1
- for file in $(cat utf8list); do
- CURRENT_CHARSET="$(chardetect "$file" | awk '{print $2}')"
- if [ "$CURRENT_CHARSET" == utf-8 ]; then
- let i++
- continue
- fi
- if [ "$CURRENT_CHARSET" == GB2312 ]; then
- CURRENT_CHARSET="GB18030"
- fi
- newname=$(head -$i asciilist | tail -1 | tr -d '\n').utf8
- echo "converting file ($CURRENT_CHARSET) to utf-8 $file => $newname"
- #iconv -f ISO-8859-1 -t utf8 $file > $newname
- iconv -f "$CURRENT_CHARSET" -t utf8 $file > $newname
- mv $newname $file
- let i++
- done
- rm utf8list asciilist
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement