Advertisement
Guest User

Expanded Diacritics Removel

a guest
Mar 5th, 2013
771
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.23 KB | None | 0 0
  1. /**
  2. * Mirror of the unicode table from 00c0 to 024f without diacritics.
  3. */
  4. //Latin 1 - Latin Extended-B
  5. private static final String tab00c0 = "aaaaaaaceeeeiiii" +
  6. "DNOOOOO\u00d7\u00d8UUUUYI\u00df" +
  7. "aaaaaaaceeeeiiii" +
  8. "\u00f0nooooo\u00f7\u00f8uuuuy\u00fey" +
  9. "aaaaaaccccccccdd" +
  10. "ddeeeeeeeeeegggg" +
  11. "gggghhhhiiiiiiii" +
  12. "iijjjjkkklllllll" +
  13. "lllnnnnnnnnnoooo" +
  14. "oooorrrrrrssssss" +
  15. "ssttttttuuuuuuuu" +
  16. "uuuuwwyyyzzzzzzf" +
  17. "bbbbbboccddddoee" +
  18. "effgyhltikklawnn" +
  19. "ooooopprsseltttt" +
  20. "uuuuyyzz3ee3255t" +
  21. "plll!dddjjjnnnaa" +
  22. "iioouuuuuuuuuuea" +
  23. "aaaaaggggkkoooo3" +
  24. "3jdddgghpnnaaaao" +
  25. "oaaaaeeeeiiiiooo" +
  26. "orrrruuuusstt33h" +
  27. "hnd88zzaaeeooooo" +
  28. "oooyybnbjbpacclt" +
  29. "sz??buaeejjqrrryy";
  30.  
  31. /**
  32. * Returns char without diacritics, converting to lowercase - 7 bit approximation.
  33. *
  34. * @param source char to convert
  35. * @return corresponding char without diacritics
  36. */
  37. private static char removeDiacritic(char source) {
  38. if (source >= '\u00c0' && source <= '\u024f') {
  39. source = tab00c0.charAt((int) source - '\u00c0');
  40. }
  41. return source;
  42. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement