Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Tokeniser. Read text and convert it to tokens for simple processing.
- enum e_TOKEN_TYPE
- {
- e_TOKEN_TYPE_NONE,
- e_TOKEN_TYPE_STRING,
- e_TOKEN_TYPE_FLOAT,
- // These two are distinct for simplicity.
- e_TOKEN_TYPE_FUNC,
- e_TOKEN_TYPE_VAR,
- e_TOKEN_TYPE_NUM,
- e_TOKEN_TYPE_BOOL,
- e_TOKEN_TYPE_OP,
- }
- enum e_TOKEN_OP
- {
- e_TOKEN_OP_NONE,
- e_TOKEN_OP_ASSIGN, // '='
- e_TOKEN_OP_EQUALS, // '=='
- e_TOKEN_OP_LTE, // '<='
- e_TOKEN_OP_GTE, // '>='
- e_TOKEN_OP_LESS, // '<'
- e_TOKEN_OP_GREATER, // '>'
- e_TOKEN_OP_NOT, // '!'
- e_TOKEN_OP_NEQ, // '!='
- e_TOKEN_OP_INV, // '~'
- e_TOKEN_OP_INV_ASS, // '~='
- e_TOKEN_OP_ADD, // '+'
- e_TOKEN_OP_ADD_ASS, // '+='
- e_TOKEN_OP_SUB, // '-'
- e_TOKEN_OP_SUB_ASS, // '-='
- e_TOKEN_OP_MUL, // '*'
- e_TOKEN_OP_MUL_ASS, // '*='
- e_TOKEN_OP_DIV, // '/'
- e_TOKEN_OP_DIV_ASS, // '/='
- e_TOKEN_OP_MOD, // '%'
- e_TOKEN_OP_MOD_ASS, // '%='
- e_TOKEN_OP_XOR, // '^'
- e_TOKEN_OP_XOR_ASS, // '^='
- e_TOKEN_OP_LAND, // '&'
- e_TOKEN_OP_LAND_ASS, // '&='
- e_TOKEN_OP_LOR, // '|'
- e_TOKEN_OP_LOR_ASS, // '|='
- e_TOKEN_OP_RSHIFT, // '>>'
- e_TOKEN_OP_RSHIFT_ASS, // '>>='
- e_TOKEN_OP_SHIFT, // '>>>'
- e_TOKEN_OP_SHIFT_ASS, // '>>>='
- e_TOKEN_OP_LSHIFT, // '<<'
- e_TOKEN_OP_LSHIFT_ASS, // '<<='
- e_TOKEN_OP_AND, // '&&'
- e_TOKEN_OP_OR, // '||'
- e_TOKEN_OP_OP_BRACKET, // '('
- e_TOKEN_OP_CL_BRACKET, // ')'
- e_TOKEN_OP_OP_BRACE, // '{'
- e_TOKEN_OP_CL_BRACE, // '}'
- e_TOKEN_OP_OP_SQUARE, // '['
- e_TOKEN_OP_CL_SQUARE, // ']'
- e_TOKEN_OP_CL_INC, // '++'
- e_TOKEN_OP_CL_DEC, // '--'
- e_TOKEN_OP_CL_ELIPSIS, // '...'
- e_TOKEN_OP_CL_CONCAT, // '..'
- e_TOKEN_OP_CL_HASH, // '#'
- e_TOKEN_OP_CL_PAAMAYIM, // '::'
- e_TOKEN_OP_CL_DQUOTE, // '"'
- e_TOKEN_OP_CL_QUOTE, // '''
- }
- enum E_TOKEN
- {
- e_TOKEN_TYPE:E_TOKEN_TYPE,
- Float:E_TOKEN_FLOAT_VAL = 1,
- bool:E_TOKEN_BOOL_VAL = 1,
- e_TOKEN_OP:E_TOKEN_OP = 1,
- E_TOKEN_FUNC_PTR = 1,
- E_TOKEN_SYM_PTR = 1,
- E_TOKEN_NUM_VAL = 1
- }
- static stock
- ISI_gInputLine[512],
- ISI_gInputLen;
- ISI_gInputPtr;
- static stock Parser_GetSymType(const name[], &e_TOKEN_TYPE:t, &ptr)
- {
- }
- static stock Parser_Peek()
- {
- return ISI_gInputLine[ISI_gInputPtr];
- }
- #define Parser_IsWhitespace((%0)) ('\0' <= (%0) <= ' ')
- static stock Parser_SkipWhitespace()
- {
- while ('\0' < ISI_gInputLine[ISI_gInputPtr] <= ' ') ++ISI_gInputPtr;
- }
- #define PARSER_DO_OP_1(%9,%0) else if (p0 == %0) ret[e_TOKEN_TYPE_OP] = (%9), len = 1;
- #define PARSER_DO_OP_2(%9,%0,%1) else if (p0 == %0 && p1 == %1) ret[e_TOKEN_TYPE_OP] = (%9), len = 2;
- #define PARSER_DO_OP_3(%9,%0,%1,%2) else if (p0 == %0 && p1 == %1 && p2 == %2) ret[e_TOKEN_TYPE_OP] = (%9), len = 3;
- #define PARSER_DO_OP_4(%9,%0,%1,%2,%3) else if (p0 == %0 && p1 == %1 && p2 == %2 && p3 == %3) ret[e_TOKEN_TYPE_OP] = (%9), len = 4;
- static stock Parser_GetOp()
- {
- new
- ret[E_TOKEN] = {e_TOKEN_TYPE_OP, 0},
- len = ISI_gInputLen - ISI_gInputPtr,
- p0, p1, p2, p3;
- if (len > 0) {
- p0 = ISI_gInputLine[ISI_gInputPtr + 0];
- if (len > 1) {
- p1 = ISI_gInputLine[ISI_gInputPtr + 1];
- if (len > 2) {
- p2 = ISI_gInputLine[ISI_gInputPtr + 2];
- if (len > 3) {
- p3 = ISI_gInputLine[ISI_gInputPtr + 3];
- }}}}
- else
- {
- Parser_Error("Unexpected end of input.");
- return ret;
- }
- // Should really use a "trie" here, but don't - too complex for now.
- if ((len = 0)) return ret; // Placeholder "if" - leave it!
- PARSER_DO_OP_4(e_TOKEN_OP_SHIFT_ASS, '>', '>', '>', '=')
- PARSER_DO_OP_3(e_TOKEN_OP_RSHIFT_ASS, '>', '>', '=')
- PARSER_DO_OP_3(e_TOKEN_OP_SHIFT, '>', '>', '>')
- PARSER_DO_OP_3(e_TOKEN_OP_LSHIFT_ASS, '<', '<', '=')
- PARSER_DO_OP_3(e_TOKEN_OP_CL_ELIPSIS, '.', '.', '.')
- PARSER_DO_OP_2(e_TOKEN_OP_EQUALS, '=', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_LTE, '<', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_GTE, '>', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_NEQ, '!', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_INV_ASS, '~', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_ADD_ASS, '+', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_SUB_ASS, '-', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_MUL_ASS, '*', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_DIV_ASS, '/', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_MOD_ASS, '%', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_XOR_ASS, '^', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_LAND_ASS, '&', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_LOR_ASS, '|', '=')
- PARSER_DO_OP_2(e_TOKEN_OP_RSHIFT, '>', '>')
- PARSER_DO_OP_2(e_TOKEN_OP_LSHIFT, '<', '<')
- PARSER_DO_OP_2(e_TOKEN_OP_AND, '&', '&')
- PARSER_DO_OP_2(e_TOKEN_OP_OR, '|', '|')
- PARSER_DO_OP_2(e_TOKEN_OP_CL_PAAMAYIM, ':', ':')
- PARSER_DO_OP_2(e_TOKEN_OP_CL_INC, '+', '+')
- PARSER_DO_OP_2(e_TOKEN_OP_CL_DEC, '-', '-')
- PARSER_DO_OP_2(e_TOKEN_OP_CL_CONCAT, '.', '.')
- PARSER_DO_OP_1(e_TOKEN_OP_ASSIGN, '=')
- PARSER_DO_OP_1(e_TOKEN_OP_LESS, '<')
- PARSER_DO_OP_1(e_TOKEN_OP_GREATER, '>')
- PARSER_DO_OP_1(e_TOKEN_OP_NOT, '!')
- PARSER_DO_OP_1(e_TOKEN_OP_INV, '~')
- PARSER_DO_OP_1(e_TOKEN_OP_ADD, '+')
- PARSER_DO_OP_1(e_TOKEN_OP_SUB, '-')
- PARSER_DO_OP_1(e_TOKEN_OP_MUL, '*')
- PARSER_DO_OP_1(e_TOKEN_OP_DIV, '/')
- PARSER_DO_OP_1(e_TOKEN_OP_MOD, '%')
- PARSER_DO_OP_1(e_TOKEN_OP_XOR, '^')
- PARSER_DO_OP_1(e_TOKEN_OP_LAND, '&')
- PARSER_DO_OP_1(e_TOKEN_OP_LOR, '|')
- PARSER_DO_OP_1(e_TOKEN_OP_OP_BRACKET, '(')
- PARSER_DO_OP_1(e_TOKEN_OP_CL_BRACKET, ')')
- PARSER_DO_OP_1(e_TOKEN_OP_OP_BRACE, '{')
- PARSER_DO_OP_1(e_TOKEN_OP_CL_BRACE, '}')
- PARSER_DO_OP_1(e_TOKEN_OP_OP_SQUARE, '[')
- PARSER_DO_OP_1(e_TOKEN_OP_CL_SQUARE, ']')
- PARSER_DO_OP_1(e_TOKEN_OP_CL_HASH, '#')
- PARSER_DO_OP_1(e_TOKEN_OP_CL_DQUOTE, '"')
- PARSER_DO_OP_1(e_TOKEN_OP_CL_QUOTE, '\'')
- if (len)
- {
- // Found an op.
- ISI_gInputPtr += len;
- }
- else
- {
- Parser_Error("Unexpected input at (%d).", ISI_gInputPtr);
- // Skip one character and try again.
- ++ISI_gInputPtr;
- }
- return ret;
- }
- #undef PARSER_DO_OP_1
- #undef PARSER_DO_OP_2
- #undef PARSER_DO_OP_3
- #undef PARSER_DO_OP_4
- static stock Parser_GetNextToken()
- {
- new ret[E_TOKEN] = {e_TOKEN_TYPE_NONE, 0};
- Parser_SkipWhitespace();
- switch (Parser_Peek())
- {
- case '\0': return ret;
- case '0' .. '9': return Parser_GetNumber();
- case 'a' .. 'z', 'A' .. 'Z', '_', '@': return Parser_GetSymbol();
- case '"': return Parser_GetString();
- case '\'': return Parser_GetChar();
- default: return Parser_GetOp();
- }
- }
- static stock Parser_GetNumber()
- {
- new
- num = 0,
- ch,
- ptr = ISI_gInputPtr;
- if (ISI_gInputLine[ptr] == '0')
- {
- // Hex, binary, or octal (or 0).
- switch ((ch = ISI_gInputLine[++ptr]))
- {
- case 'x', 'X': return Parser_DoHex(); // Hex
- case 'b', 'B': return Parser_DoBinary(); // Binary
- case 'e', 'E', '.': return Parser_DoFloat(); // Float
- case '0' .. '9':
- {
- new
- bool:oct = ('0' <= ch <= '7');
- num = ch - '0';
- for ( ; ; )
- {
- switch ((ch = ISI_gInputLine[++ptr]))
- {
- case '0' .. '7': num = num * 10 + (ch - '0'), oct &= true;
- case 'e', 'E', '.': return Parser_DoFloat(); // Float
- case '8', '9': num = num * 10 + (ch - '0'), oct = false;
- default: break;
- }
- }
- if (oct) return Parser_DoOct();
- }
- }
- }
- else
- {
- // Decimal or float.
- // Lookahead.
- num = ISI_gInputLine[ptr] - '0';
- for ( ; ; )
- {
- switch ((ch = ISI_gInputLine[++ptr]))
- {
- case '0' .. '9': num = num * 10 + (ch - '0');
- case 'e', 'E', '.': return Parser_DoFloat();
- default: break;
- }
- }
- }
- new
- ret[E_TOKEN] = {e_TOKEN_TYPE_NUM, num};
- // Decimal (default).
- ISI_gInputPtr = ptr;
- return ret;
- }
Advertisement
Add Comment
Please, Sign In to add comment