Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- private String[] getCodepage(String strText) {
- String numChars = "1234567890";
- String engChars = "qwertyuioplkjhgfdsazxcvbnmQWERTYUIOPLKJHGFDSAZXCVBNM";
- String rusChars = "йцукенгшщзхъэждлорпавыфячсмитьбюёЙЦУКЕНГШЩЗХЪЭЖДЛОРПАВЫФЯЧСМИТЬБЮЁ";
- String char1 = "";
- int[] rates = new int[9];
- for (int i = 0; i < strText.length(); i++) {
- // cp1251 -> cp1251
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("cp1251")), Charset.forName("cp1251"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[0]++;
- }
- // cp1252 -> cp1252
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("cp1252")), Charset.forName("cp1252"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[1]++;
- }
- // utf8 -> utf8
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("UTF-8")), Charset.forName("UTF-8"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[2]++;
- }
- // cp1251 -> cp1252
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("cp1251")), Charset.forName("cp1252"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[3]++;
- }
- // cp1252 -> cp1251
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("cp1252")), Charset.forName("cp1251"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[4]++;
- }
- // cp1251 -> utf8
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("cp1251")), Charset.forName("UTF-8"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[5]++;
- }
- // utf8 -> cp1251
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("UTF-8")), Charset.forName("cp1251"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[6]++;
- }
- // cp1252 -> utf8
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("cp1252")), Charset.forName("UTF-8"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[7]++;
- }
- // utf8 -> cp1252
- char1 = new String(strText.substring(i, i + 1).getBytes(
- Charset.forName("UTF-8")), Charset.forName("cp1252"));
- if (engChars.contains(char1) | rusChars.contains(char1)
- | numChars.contains(char1)) {
- rates[8]++;
- }
- int indexMax = 0;
- for (i = 0; i < 9; i++) {
- if (rates[i] > rates[indexMax]) {
- indexMax = i;
- }
- }
- switch (indexMax) {
- case 0:
- return new String[] { "cp1251", "cp1251" };
- case 1:
- return new String[] { "cp1252", "cp1252" };
- case 2:
- return new String[] { "utf8", "utf8" };
- case 3:
- return new String[] { "cp1251", "cp1252" };
- case 4:
- return new String[] { "cp1252", "cp1251" };
- case 5:
- return new String[] { "cp1251", "utf8" };
- case 6:
- return new String[] { "utf8", "cp1251" };
- case 7:
- return new String[] { "cp1252", "utf8" };
- case 8:
- return new String[] { "utf8", "cp1252" };
- }
- }
- return new String[] { "utf8", "utf8" };
- }
Advertisement
Add Comment
Please, Sign In to add comment