Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- int check_utf8 (const char * string) {
- // if you're trying to read and understand the UTF-8 functions... I'm sorry
- int codepoint;
- for (; *string; string ++) {
- if (*string < 0x80) continue;
- if (*string < 0xc2) return 0;
- if (*string < 0xe0) {
- if (string[1] < 0x80) return 0;
- string ++;
- continue;
- }
- if (*string < 0xf0) {
- codepoint = get_utf8_codepoint((const unsigned char *) string + 1, *string, 2);
- if (codepoint < 0x800) return 0;
- if ((codepoint >= 0xd800) && (codepoint < 0xe000)) return 0;
- string += 2;
- continue;
- }
- if (*string < 0xf5) {
- codepoint = get_utf8_codepoint((const unsigned char *) string + 1, *string, 3);
- if (codepoint < 0x10000) return 0;
- if (codepoint > 0x10ffff) return 0;
- string += 3;
- continue;
- }
- return 0;
- }
- return 1;
- }
- int get_utf8_codepoint (const unsigned char * continuations, unsigned char initial, unsigned char readahead) {
- if (readahead > 6) return -1;
- if (!readahead) return initial;
- unsigned result = initial & ((1 << (6 - readahead)) - 1);
- for (; readahead --; continuations ++) {
- if ((*continuations < 0x80) || (*continuations >= 0xc0)) return -1;
- result <<= 6;
- result |= *continuations & 0x3f;
- }
- return result;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement