Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ### Code "Exists.java"
- ### Orignally taken from http://stackoverflow.com/a/3835183/1440785
- import java.io.*;
- import java.nio.charset.*;
- public class Exists {
- public static void main(String[] args) throws Exception {
- if (args.length > 0 && args[0].equals("stat")) {
- new File("bbbääccc").exists();
- }
- else if (args.length > 0 && args[0].equals("new")) {
- new File("bbbääccc").createNewFile();
- }
- else if (args.length > 0 && args[0].equals("list")) {
- File[] files = new File(".").listFiles();
- for (File file : files) {
- if (file.getName().startsWith("bb")) {
- System.out.println(file);
- System.out.println("Hex (current encoding): " + toHex(file.getName().getBytes()) + " --" + file.getName());
- String utf8Str = new String(file.getName().getBytes("UTF-8"), Charset.forName("UTF-8"));
- System.out.println("Hex (UTF8): " + toHex(file.getName().getBytes("UTF-8")) + " --" + utf8Str);
- new File("new/as-is-" + file.getName()).createNewFile();
- new File("new/utf8-" + utf8Str).createNewFile();
- }
- }
- }
- }
- public static String toHex(byte[] bytes) {
- StringBuilder str = new StringBuilder();
- for (int i = 0; i < bytes.length; i++) {
- str.append(String.format("%02x", bytes[i]));
- }
- return str.toString();
- }
- }
- ### stat() output for new File("bbbääccc").exists()
- $ LC_CTYPE="en_US.iso88591" strace -f -o out java Exists stat
- From file "out": 30989 stat64("bbb\344\344ccc", 0xb7884f2c) = -1 ENOENT (No such file or directory)
- $ LC_CTYPE="en_US.utf8" strace -f -o out java Exists stat
- From file "out": 31096 stat64("bbb\303\244\303\244ccc", 0xb77a9f2c) = -1 ENOENT (No such file or directory)
- ### Java Property file.encoding=.. has no impact on filenames
- $ LC_CTYPE="en_US.utf8" strace -f -o out java -Dfile.encoding=ISO-8859-1 Exists stat
- From file "out": 31121 stat64("bbb\303\244\303\244ccc", 0xb7864f2c) = -1 ENOENT (No such file or directory)
- $ LC_CTYPE="en_US.utf8" strace -f -o out java -Dfile.encoding=UTF-8 Exists stat
- From file "out": 31367 stat64("bbb\303\244\303\244ccc", 0xb76e1f2c) = -1 ENOENT (No such file or directory)
- $ LC_CTYPE="en_US.iso88591" strace -f -o out java -Dfile.encoding=ISO-8859-1 Exists stat
- From file "out": 31158 stat64("bbb\344\344ccc", 0xb7759f2c) = -1 ENOENT (No such file or directory)
- $ LC_CTYPE="en_US.iso88591" strace -f -o out java -Dfile.encoding=UTF-8 Exists stat
- From file "out": 31176 stat64("bbb\344\344ccc", 0xb7851f2c) = -1 ENOENT (No such file or directory)
- ### Creating a file with umlaut (ä)
- $ LC_CTYPE="en_US.iso88591" strace -f -o out java -Dfile.encoding=ISO-8859-1 Exists new
- $ ls -al bb*
- -rw-rw-r-- 1 user user 0 2013-12-14 15:27 bbb??ccc
- $ LC_CTYPE="en_US.iso88591" ls -al bb*
- -rw-rw-r-- 1 user user 0 2013-12-14 15:27 bbb��ccc
- $ convmv -f iso-8859-1 -t utf8 bbbääccc
- Your Perl version has fleas #37757 #49830
- Starting a dry run without changes...
- mv "./bbb��ccc" "./bbbääccc"
- ### Create and list a file with umlaut (ä)
- $ LC_CTYPE="en_US.iso88591" strace -f -o out java -Dfile.encoding=ISO-8859-1 Exists new
- // creates new file 'bbbääccc'
- $ ls bb*
- bbb??ccc
- $ LC_CTYPE="en_US.iso88591" strace -f -o out java -Dfile.encoding=ISO-8859-1 Exists list
- ./bbb��ccc
- $ LC_CTYPE="en_US.iso88591" strace -f -o out java -Dfile.encoding=utf8 Exists list
- ./bbbääccc
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement