Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- static int32_t *get_token(int32_t **buf, int *toklen) {
- int i = 0, datalen = 16;
- int32_t *b = *buf;
- int32_t *data;
- if ((data = (int32_t *) malloc(datalen * sizeof(int32_t))) == NULL)
- return NULL;
- for (;;) {
- int32_t uc = *b++;
- utf8proc_propval_t cat = utf8proc_get_property(uc).category;
- if (!uc) {
- *toklen = i;
- *buf = b;
- return data = realloc(data, i);
- }
- if (cat != UTF8PROC_CATEGORY_ZS) {
- data[i++] = uc;
- if (i >= datalen) {
- int32_t *t;
- if ((t = (int32_t *) realloc(data, datalen <<= 1)) == NULL) {
- free(data);
- return NULL;
- }
- data = t;
- }
- } else { // end of token; find start of next
- *toklen = i;
- data = realloc(data, i);
- for (;;) {
- uc = *b;
- if (!uc)
- return data;
- if (utf8proc_get_property(uc).category != UTF8PROC_CATEGORY_ZS) {
- *buf = b;
- return data;
- }
- b++;
- }
- }
- }
- return NULL; // unreachable
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement