Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # scott nicholas (neutron@scottn.us) 2011. public domain.
- # take input, show UTF-8 and try to display it.
- {
- chr = conv_unicode_to_utf8($0, arr);
- printf("[%s]=[%s]=[%s]\n", $0, arr[0], chr);
- }
- function conv_init( i, j, c, h, a)
- {
- __conv_init = 1
- split("1 2 3 4 5 6 7 8 9 A B C D E F", __hextab, " ")
- split("0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111", __bintab, " ")
- __hextab[0] = 0
- __bintab[0] = "0000"
- for (i = 1; i <= 255; ++i)
- __chr2hex[sprintf("%c", i)""] = sprintf("%02X", i)
- for (i = 0; i < 16; i++) {
- __hex2bin[__hextab[i]] = __bintab[i]
- __bin2dec[__bintab[i]] = i
- __bin2hex[__bintab[i]] = __hextab[i]
- # __hex2dec[__hextab[i]] = i
- }
- }
- function conv_hex2bin(hex,
- i, bin)
- {
- if (!__conv_init) conv_init()
- for (i = 1; i <= length(hex); i++)
- bin = bin __hex2bin[toupper(substr(hex, i, 1))]
- return bin
- }
- function conv_bin2chr(bin,
- i, chr)
- {
- if (!__conv_init) conv_init()
- for (i = 1; i <= length(bin); i += 8)
- {
- chr = chr sprintf("%c", \
- 16 * __bin2dec[substr(bin, i, 4)] + \
- __bin2dec[substr(bin, i+4, 4)])
- }
- return chr
- }
- function conv_bin2hex(bin,
- i, hex)
- {
- if (!__conv_init) conv_init()
- for (i = 1; i <= length(bin); i += 4)
- hex = hex __bin2hex[substr(bin, i, 4)]
- return hex
- }
- function conv_unicode_to_utf8(hex, arr,
- bin, blen, utf8)
- {
- if (length(hex) != 4) return "" # i dunno
- # hex2bin
- bin = conv_hex2bin(hex)
- blen = 17 - index(bin, "1")
- # then add UTF-8 prefixes. we have 2 bytes right now.
- if (blen > 11)
- utf8 = "1110" substr(bin, 1, 4) "10" substr(bin, 5, 6) \
- "10" substr(bin, 11, 6)
- else if (blen > 7)
- utf8 = "110" substr(bin, 6, 5) "10" substr(bin, 11, 6)
- else
- utf8 = substr(bin, 9)
- # pass back in array too
- arr[0] = conv_bin2hex(utf8)
- # then back to ... characters? :O
- return conv_bin2chr(utf8)
- }
Add Comment
Please, Sign In to add comment