Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- Это мой spj.h
- Вставил заодно вместе с набросками .c
- */
- #ifndef _SPJ_H
- #define _SPJ_H
- #include <stdlib.h>
- struct jval;
- #ifdef USE_OBJ_DICT
- struct htab; // will be implemented sometime
- #endif
- typedef enum jValue_type {
- Unknown = 0,
- Object,
- Array,
- String,
- Number,
- Bool,
- Null
- } jValue_type;
- typedef struct jObject {
- struct jval *data; // array
- int size;
- #ifdef USE_OBJ_DICT
- struct htab *hash;
- #endif
- } jObject;
- typedef struct jArray {
- struct jval *data; // array
- int size;
- } jArray;
- typedef struct jString {
- char *data; // nil terminating
- int size;
- } jString;
- union value {
- struct jObject obj;
- struct jArray arr;
- struct jString str;
- double number; // Bool, Null, integer and real (all in one)
- };
- typedef struct jval {
- jValue_type type; // try to do it without flags
- struct jval *parent; // calculated AFTER the filling of the PARENT's container
- char *name; // name pairs (valid only for members of the object)
- union value value;
- } jValue;
- typedef enum jtoken {
- Jeof,
- Jnumber,
- Jstring,
- Jbool, // true false
- Jnull, // null
- Jobjstart, // {
- Jarrstart, // [
- Jobjend, // }
- Jarrend, // ]
- Jcolon, // : =
- Jcomma, // ,
- Jname, // extension for name : value (unquoted name)
- Jerror,
- Janymember,
- Jnomem = -1
- } jToken;
- typedef struct spjiter {
- const char *data;
- size_t dsize, curpos;
- } jIter;
- typedef struct jlexer {
- jIter it;
- jToken tok;
- int charin, charpos;
- char *errmsg;
- union value lv;
- } jLexer;
- jToken spj_getoken (jLexer *lex);
- #endif
- /*
- spj.c
- Это просто наброски кода лексера и двух функций парсера
- (парсер нужно рассматривать в качестве псевдокода)
- Станислав, я не стал класть сюда некоторые функции,
- возмжно самому будет интересно их написать (4/2/2014 я их длбавил сюда).
- Только у меня некоторые структуры немного другие.
- */
- // функции для get_jutf8str и еще несколько для сообщений об ошибке
- // эти .h я использую тут
- #include <stdio.h>
- #include <string.h>
- #include <stdint.h>
- #include <ctype.h>
- #include <errno.h>
- #include <stdarg.h>
- // эта кладет в buf[] размером bs текст из исходного json, вызвавший ошибку при распознавании true,false,null
- // например, для tru или trueeee
- static char *
- stsubstr (const char *src, const char *end, char buf[], int bs)
- {
- int l = end - src;
- if (l > bs - 1) {
- strncpy(buf, src, bs - 4);
- strcpy(buf + bs - 4, "...");
- } else {
- strncpy(buf, src, l);
- buf[l] = 0;
- }
- return buf;
- }
- // like sprintf to dynamic string in jLexer
- static jToken
- spj_eprintf (jLexer *lex, jToken curtok, const char *fmt, ...)
- {
- lex->errtok = curtok;
- int retl;
- jToken rc = Jnomem;
- va_list ap;
- va_start(ap, fmt);
- retl = vsnprintf(lex->errmsg, 0, fmt, ap);
- va_end(ap);
- if (lex->errmsg = (char *)malloc(retl + 1)) {
- va_start(ap, fmt);
- vsnprintf(lex->errmsg, retl + 1, fmt, ap);
- va_end(ap);
- rc = Jerror;
- }
- return rc;
- }
- // like sprintf to concat string with lex->errmsg
- static jToken
- spj_emsgcat (jLexer *lex, const char *fmt, ...)
- {
- int retl, lemsg = lex->errmsg ? strlen(lex->errmsg) : 0;
- char *t = lex->errmsg;
- jToken rc = Jnomem;
- va_list ap;
- va_start(ap, fmt);
- retl = vsnprintf(t, 0, fmt, ap);
- va_end(ap);
- if (lex->errmsg = (char *)realloc(lex->errmsg, retl + lemsg + 1)) {
- va_start(ap, fmt);
- vsnprintf(lex->errmsg + lemsg, retl, fmt, ap);
- va_end(ap);
- rc = Jerror;
- } else {
- lex->errmsg = t;
- }
- return rc;
- }
- // check true,false,null
- static int
- check_tfn (jLexer *lex, const char *w)
- {
- int c;
- const char *cw = w;
- while ((c = spj_getc(&lex->it)) && *w == c)
- w++;
- if (c && *w == 0)
- if (isspace(c) || strchr("]},", c)) {
- spj_seek(&lex->it, -1);
- return 1;
- }
- char buf[10];
- if (c) {
- spj_eprintf (lex, lex->charin == 'n' ? Jnull : Jbool,
- "unexpected '%c%s%c...' instead of '%c%s' at pos: %d\n",
- lex->charin, stsubstr(cw, w, buf, sizeof(buf)), c,
- lex->charin, cw, lex->charpos);
- spj_seek(&lex->it, -1);
- } else {
- spj_eprintf (lex, lex->charin == 'n' ? Jnull : Jbool,
- "unexpected EOF instead of '%c%s' at pos: %d\n",
- lex->charin, cw, lex->charpos);
- }
- return 0;
- }
- // Call it only for VALID c !!!
- static inline int
- hexdigit (int c)
- {
- if (c <= '9')
- return c-'0';
- if (c <= 'F')
- return c-'A'+10;
- return c-'a'+10;
- }
- // returns 0 if surrogate pair is not valid
- int
- surpair_to_ucs (int sp[2])
- {
- int ucs = 0;
- if ((sp[0] & 0xFFFFFC00) == 0xD800 && (sp[1] & 0xFFFFFC00) == 0xDC00)
- ucs = ((sp[0] & 0x3FF) << 10) | (sp[1] & 0x3FF);
- return ucs;
- }
- /*
- For a given UCS puts in memory bytes in UTF-8
- Returns the number of bytes, or 0 on error (UCS <0)
- */
- int
- ucs_to_utf8 (int uc, char *b)
- {
- if (uc < 0)
- return 0;
- u_int ucs = uc;
- if (ucs < 128) {
- b[0] = ucs;
- return 1;
- }
- int n = 6, i;
- if (ucs < 0x800)
- n = 2;
- else if (ucs < 0x10000)
- n = 3;
- else if (ucs < 0x200000)
- n = 4;
- else if (ucs < 0x4000000)
- n = 5;
- char *u = b+n-1;
- static u_int
- mask1[7] = { 0x0, 0x0, 0x1f, 0x0f, 0x07, 0x03, 0x01},
- mask2[7] = { 0x0, 0x0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
- for (i = 0; i < n-1; i++, u--, ucs >>= 6)
- *u = (ucs & 0x3f) | 0x80;
- *u = (ucs & mask1[n]) | mask2[n];
- return n;
- }
- // 4 hex digits to integer (returns -1 on error)
- static int
- spj_gethex4 (jLexer *lex)
- {
- int i, ucs = 0, c;
- for (i = 0; i < 4; i++) {
- if (isxdigit(c = spj_getc(&lex->it)))
- ucs = (ucs << 4) | hexdigit(c);
- else {
- spj_eprintf (lex, Jstring,
- "invalid '%c' (0x02x) character in \\uHHHH in Jstring before pos: %d\n",
- c, c, lex->it.curpos);
- return -1;
- }
- }
- return ucs;
- }
- // json String encoded utf-8
- jToken
- get_jutf8str (jLexer *lex)
- {
- jToken rc = Jeof;
- int ucs32, c, lutf8, lstr = 0, capacity, incr = JSTR_INIT_INCREMENT;
- char utf8[8], *str = (char *)malloc(capacity = JSTR_INIT_SIZE);
- if (!str)
- return Jnomem;
- while (c = spj_getc(&lex->it)) {
- ucs32 = -1;
- if (c == '"') { // return result
- lex->lv.str.size = lstr;
- str[lstr++] = 0;
- lex->lv.str.data = (char *)realloc(str, lstr); // free unused bytes
- return Jstring;
- }
- if (c == '\\') {
- // c == 0 after switch() means any error is found
- switch (c = spj_getc(&lex->it)) {
- case 0: break;
- case 't': c = '\t'; break;
- case 'r': c = '\r'; break;
- case 'n': c = '\n'; break;
- case 'f': c = '\f'; break;
- case 'b': c = '\b'; break;
- case '\\': c = '\\'; break;
- case '/': c = '/'; break;
- case '"': c = '"'; break;
- case 'u':
- {
- int sp[2]; // assume the appearance of a surrogate pair
- if ((ucs32 = sp[0] = spj_gethex4(lex)) == -1) {
- rc = Jerror; c = 0; // c == 0 error flag
- break;
- }
- if ((ucs32 & 0xFC00) == 0xd800) { // surrogate pair
- if ((c = spj_getc(&lex->it)) == '\\' &&
- (c = spj_getc(&lex->it)) == 'u') {
- if (!(sp[1] = spj_gethex4(lex))) {
- rc = Jerror; c = 0; break;
- }
- if (!(ucs32 = surpair_to_ucs(sp))) {
- fprintf (stderr, "invalid surrogate pair in Jstring before pos: %ld\n",
- lex->it.curpos);
- rc = Jerror; c = 0; break;
- }
- } else {
- fprintf (stderr, "unexpected character '\\%c' (0x%02x) in Jstring at pos: %ld\n",
- c, c, lex->it.curpos);
- rc = Jerror; c = 0; break;
- }
- }
- }
- break;
- default:
- rc = Jerror;
- fprintf (stderr, "invalid escape sequence '\\%c' (0x%02x) in Jstring at pos: %ld\n",
- c, c, lex->it.curpos);
- c = 0;
- }
- if (!c)
- break; // break while () loop, free mem, returns error
- }
- /*
- We assume that JSON encoded UTF-8.
- Therefore, all symbols except encoded by \uHHHH
- copied into the result
- */
- if (ucs32 != -1) { // utf-8 value of \uHHHH is in utf8[]
- if (!(lutf8 = ucs_to_utf8(ucs32, utf8))) {
- rc = Jerror;
- fprintf (stderr, "invalid ucs to utf8 conversion near pos: %ld\n",
- lex->it.curpos);
- break;
- }
- } else {
- lutf8 = 1;
- utf8[0] = c;
- }
- if (lutf8 + 1 + lstr > capacity) {
- char *t = (char *)realloc(str, capacity += (lutf8 + incr));
- if (!t) {
- rc = Jnomem;
- break;
- }
- str = t;
- if ((incr <<= 1) > JSTR_MAX_INCREMENT)
- incr = JSTR_MAX_INCREMENT;
- }
- memcpy(str + lstr, utf8, lutf8);
- lstr += lutf8;
- }
- free(str);
- return rc;
- }
- jToken
- spj_getoken (jLexer *lex)
- {
- int c, i;
- while (isspace(c = spj_getc(&lex->it)));
- lex->charin = c;
- lex->charpos = spj_seek(&lex->it, 0); // one position more !!!
- switch (c) {
- case 0:
- return lex->tok = Jeof;
- case '{': return lex->tok = Jobjstart;
- case '}': return lex->tok = Jobjend;
- case '[': return lex->tok = Jarrstart;
- case ']': return lex->tok = Jarrend;
- case ':': case '=': return lex->tok = Jcolon;
- case ',': return lex->tok = Jcomma;
- case 't': case 'f': case 'n':
- lex->lv.number = (c == 't'); // false, null = 0
- {
- const char *w = "rue";
- if (c == 'f')
- w = "alse";
- else if (c == 'n')
- w = "ull";
- return !check_tfn(lex, w) ? lex->tok = Jerror :
- (c == 'n') ? (lex->tok = Jnull) : (lex->tok = Jbool);
- }
- case '"':
- return lex->tok = get_jutf8str(lex);
- default :
- if (isdigit(c) || c == '-') {
- char *t, s[22];
- s[0] = c;
- for (i = 1;
- (isdigit(c = spj_getc(&lex->it)) || strchr("eE+-.", c)) && i < 20;
- i++)
- s[i] = c;
- s[i++] = c;
- s[i] = 0;
- errno = 0;
- lex->lv.number = strtod(s, &t);
- if (errno || s == t ||
- !(isspace(*t) || *t == ',' || *t == '}' || *t == ']')) {
- fprintf (stderr, "invalid number '%s' at pos: %ld\n",
- s, lex->it.curpos);
- return lex->tok = Jerror;
- }
- spj_seek(&lex->it, -1);
- return lex->tok = Jnumber;
- }
- fprintf (stderr, "unexpected character '%c' (0x%02x) at pos: %ld\n",
- c, c, lex->it.curpos);
- return lex->tok = Jerror;
- }
- fprintf (stderr, "interror: spj_getoken() notreached\n");
- return lex->tok = Jerror;
- }
- /*
- это чисто заглушки, чтобы компилились spj_parse_object() и spj_parse_array()
- */
- static void complete_obj (jValue *obj) {
- }
- static void delete_member (jValue *obj) {
- }
- static void complete_arr (jValue *arr) {
- }
- static void add_arrmember (jValue *arr, jValue *member, int n) {
- }
- static void put_objmember (jValue *obj, jValue *member) {
- }
- static jToken struct_error (jToken rc, jToken expected, jLexer *lex, jValue *obj) {
- }
- static jValue_type do_jtype (jToken rc) {
- }
- jToken spj_parse_array (jLexer *lex, jValue *obj);
- jToken
- spj_parse_object (jLexer *lex, jValue *obj)
- {
- obj->type = Object;
- char *name;
- jToken rc;
- int n;
- for (n = 0;;n++) {
- if ((rc = spj_getoken(lex)) == Jobjend && n == 0)
- break;
- if (rc != Jstring)
- return struct_error(rc, Jstring, lex, obj);
- name = lex->lv.str.data;
- if ((rc = spj_getoken(lex)) != Jcolon)
- return struct_error(rc, Jcolon, lex, obj);
- jValue member = {0};
- member.name = name;
- switch (rc = spj_getoken(lex)) {
- case Jnumber: case Jstring: case Jbool: case Jnull:
- member.value = lex->lv;
- member.type = do_jtype(rc);
- break;
- case Jobjstart:
- if ((rc = spj_parse_object(lex, &member)) == Jobjend)
- break; // OK
- delete_member(&member);
- return rc;
- case Jarrstart:
- if ((rc = spj_parse_array(lex, &member)) == Jarrend)
- break; // OK
- delete_member(&member);
- return rc;
- default:
- return struct_error(rc, Janymember, lex, obj);
- }
- put_objmember(obj, &member);
- if ((rc = spj_getoken(lex)) == Jobjend)
- break;
- if (rc != Jcomma)
- return struct_error(rc, Jcomma, lex, obj);
- }
- complete_obj(obj);
- return rc;
- }
- jToken
- spj_parse_array (jLexer *lex, jValue *obj)
- {
- obj->type = Array;
- jToken rc;
- int n;
- for (n = 0;;n++) {
- if ((rc = spj_getoken(lex)) == Jarrend && n == 0)
- break;
- jValue member = {0};
- switch (rc) {
- case Jnumber: case Jstring: case Jbool: case Jnull:
- member.value = lex->lv;
- member.type = do_jtype(rc);
- break;
- case Jobjstart:
- if ((rc = spj_parse_object(lex, &member)) == Jobjend)
- break; // OK
- delete_member(&member);
- return rc;
- case Jarrstart:
- if ((rc = spj_parse_array(lex, &member)) == Jarrend)
- break; // OK
- delete_member(&member);
- return rc;
- default:
- return struct_error(rc, Janymember, lex, obj);
- }
- add_arrmember(obj, &member, n);
- if ((rc = spj_getoken(lex)) == Jarrend)
- break;
- if (rc != Jcomma)
- return struct_error(rc, Jcomma, lex, obj);
- }
- complete_arr(obj);
- return rc;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement