Advertisement
rg443

javascript utf

Jan 19th, 2013
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /* utf.js - UTF-8 <=> UTF-16 convertion
  2.  *
  3.  * http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt
  4.  * Version: 1.0
  5.  * LastModified: Dec 25 1999
  6.  * This library is free.  You can redistribute it and/or modify it.
  7.  */
  8.  
  9. /*
  10.  * Interfaces:
  11.  * utf8 = utf16to8(utf16);
  12.  * utf16 = utf16to8(utf8);
  13.  */
  14.  
  15. function utf16to8(str) {
  16.     var out, i, len, c;
  17.  
  18.     out = "";
  19.     len = str.length;
  20.     for(i = 0; i < len; i++) {
  21.     c = str.charCodeAt(i);
  22.     if ((c >= 0x0001) && (c <= 0x007F)) {
  23.         out += str.charAt(i);
  24.     } else if (c > 0x07FF) {
  25.         out += String.fromCharCode(0xE0 | ((c >> 12) & 0x0F));
  26.         out += String.fromCharCode(0x80 | ((c >>  6) & 0x3F));
  27.         out += String.fromCharCode(0x80 | ((c >>  0) & 0x3F));
  28.     } else {
  29.         out += String.fromCharCode(0xC0 | ((c >>  6) & 0x1F));
  30.         out += String.fromCharCode(0x80 | ((c >>  0) & 0x3F));
  31.     }
  32.     }
  33.     return out;
  34. }
  35.  
  36. function utf8to16(str) {
  37.     var out, i, len, c;
  38.     var char2, char3;
  39.  
  40.     out = "";
  41.     len = str.length;
  42.     i = 0;
  43.     while(i < len) {
  44.     c = str.charCodeAt(i++);
  45.     switch(c >> 4)
  46.     {
  47.       case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
  48.         // 0xxxxxxx
  49.         out += str.charAt(i-1);
  50.         break;
  51.       case 12: case 13:
  52.         // 110x xxxx   10xx xxxx
  53.         char2 = str.charCodeAt(i++);
  54.         out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F));
  55.         break;
  56.       case 14:
  57.         // 1110 xxxx  10xx xxxx  10xx xxxx
  58.         char2 = str.charCodeAt(i++);
  59.         char3 = str.charCodeAt(i++);
  60.         out += String.fromCharCode(((c & 0x0F) << 12) |
  61.                        ((char2 & 0x3F) << 6) |
  62.                        ((char3 & 0x3F) << 0));
  63.         break;
  64.     }
  65.     }
  66.  
  67.     return out;
  68. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement