Untitled

// Counting Unicode codepoints in UTF-8:

#include <stdio.h>

int utf8_strlen(const char *s) {
    int count = 0;
    while (*s) {
        if ((*s & 0xC0) != 0x80) // Continuation bytes start with 10xxxxxx
            count++;
        s++;
    }
    return count;
}

// Indexing (getting the N-th Unicode character):

const char* utf8_nth(const char *s, int n) {
    int i = 0;
    while (*s) {
        if ((*s & 0xC0) != 0x80) {
            if (i == n)
                return s;
            i++;
        }
        s++;
    }
    return NULL; // Out of range
}

// Decoding a single UTF-8 codepoint:

#include <stdint.h>

uint32_t utf8_decode(const char *s, int *bytes) {
    uint32_t cp = 0;
    unsigned char c = s[0];

    if (c < 0x80) {
        *bytes = 1;
        return c;
    } else if ((c & 0xE0) == 0xC0) {
        *bytes = 2;
        cp = c & 0x1F;
    } else if ((c & 0xF0) == 0xE0) {
        *bytes = 3;
        cp = c & 0x0F;
    } else if ((c & 0xF8) == 0xF0) {
        *bytes = 4;
        cp = c & 0x07;
    }

    for (int i = 1; i < *bytes; i++) {
        cp = (cp << 6) | (s[i] & 0x3F);
    }
    return cp;
}