Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

Expanded Diacritics Removel

By: a guest on Mar 5th, 2013  |  syntax: None  |  size: 1.23 KB  |  views: 88  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. /**
  2.          * Mirror of the unicode table from 00c0 to 024f without diacritics.
  3.          */
  4.         //Latin 1 - Latin Extended-B
  5.         private static final String tab00c0 = "aaaaaaaceeeeiiii" +
  6.             "DNOOOOO\u00d7\u00d8UUUUYI\u00df" +
  7.             "aaaaaaaceeeeiiii" +
  8.             "\u00f0nooooo\u00f7\u00f8uuuuy\u00fey" +
  9.             "aaaaaaccccccccdd" +
  10.             "ddeeeeeeeeeegggg" +
  11.             "gggghhhhiiiiiiii" +
  12.             "iijjjjkkklllllll" +
  13.             "lllnnnnnnnnnoooo" +
  14.             "oooorrrrrrssssss" +
  15.             "ssttttttuuuuuuuu" +
  16.             "uuuuwwyyyzzzzzzf" +
  17.             "bbbbbboccddddoee" +
  18.             "effgyhltikklawnn" +
  19.             "ooooopprsseltttt" +
  20.             "uuuuyyzz3ee3255t" +
  21.             "plll!dddjjjnnnaa" +
  22.             "iioouuuuuuuuuuea" +
  23.             "aaaaaggggkkoooo3" +
  24.             "3jdddgghpnnaaaao" +
  25.             "oaaaaeeeeiiiiooo" +
  26.             "orrrruuuusstt33h" +
  27.             "hnd88zzaaeeooooo" +
  28.             "oooyybnbjbpacclt" +
  29.             "sz??buaeejjqrrryy";
  30.  
  31. /**
  32.          * Returns char without diacritics, converting to lowercase - 7 bit approximation.
  33.          *
  34.          * @param source char to convert
  35.          * @return corresponding char without diacritics
  36.          */
  37.         private static char removeDiacritic(char source) {
  38.                 if (source >= '\u00c0' && source <= '\u024f') {
  39.                     source = tab00c0.charAt((int) source - '\u00c0');
  40.                 }
  41.             return source;
  42.         }