Advertisement
Guest User

Untitled

a guest
Oct 17th, 2019
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.79 KB | None | 0 0
  1. #! /bin/bash
  2.  
  3. set -e # Exit script immediately on first error.
  4. #set -x # Print commands and their arguments as they are executed.
  5.  
  6. NON_UTF_FILE_DIR="wiki"
  7. PATTERN_FILE_NAME="*.md"
  8.  
  9. find $NON_UTF_FILE_DIR -type f -name $PATTERN_FILE_NAME > utf8list
  10. iconv utf8list > asciilist
  11. i=1
  12. for file in $(cat utf8list); do
  13.  
  14. CURRENT_CHARSET="$(chardetect "$file" | awk '{print $2}')"
  15. if [ "$CURRENT_CHARSET" == utf-8 ]; then
  16. let i++
  17. continue
  18. fi
  19. if [ "$CURRENT_CHARSET" == GB2312 ]; then
  20. CURRENT_CHARSET="GB18030"
  21. fi
  22.  
  23. newname=$(head -$i asciilist | tail -1 | tr -d '\n').utf8
  24. echo "converting file ($CURRENT_CHARSET) to utf-8 $file => $newname"
  25. #iconv -f ISO-8859-1 -t utf8 $file > $newname
  26. iconv -f "$CURRENT_CHARSET" -t utf8 $file > $newname
  27. mv $newname $file
  28. let i++
  29. done
  30.  
  31. rm utf8list asciilist
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement