Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "Lexer.h"
- int tour;
- bool Lexer::isAlphanum(char const& c)
- {
- if (isalpha(c) || isdigit(c))
- return 0;
- return 1;
- }
- Lexer::Lexer()
- {
- }
- Lexer::~Lexer()
- {
- }
- deque<Token>* Lexer::Tokenize(string const& str)
- {
- eatChar(str, 0);
- return toks;
- }
- deque<Token>* Lexer::returnTkDqPtr()
- {
- return toks;
- }
- Token Lexer::makeToken(string const& fragment, nToken::tTypes const& category)
- {
- Token rtr(fragment, category);
- return rtr;
- }
- void Lexer::flushToken()
- {
- Token push(curtok, nToken::tTypes::undefined);
- if (NAML_DEBUG_MSG)
- cout << "pushed " << push.getContent() << endl;
- toks->push_back(push);
- curtok = "";
- }
- void Lexer::flushToken(nToken::tTypes const& type)
- {
- Token push(curtok,type);
- if (NAML_DEBUG_MSG)
- cout << "pushed " << push.getContent() << endl;
- toks->push_back(push);
- curtok = "";
- }
- bool Lexer:: eatChar(string const& str, int const& pos)
- {
- if (NAML_DEBUG_MSG)
- {
- tour++;
- cout << "@" << tour << endl;
- }
- int npos = (pos + 1);
- char _now = str[pos];
- if (pos == str.length())
- {
- if (curtok != "")
- flushToken();
- return 1;
- }
- switch (nGettype(_now))
- {
- //
- case cTypes::quote:
- case cTypes::apos:
- if (isInStr)
- {
- isInStr = false;
- flushToken(nToken::tTypes::nstring);
- }
- else
- {
- isInStr = true;
- return eatChar(str, npos);
- }
- break;
- //
- case cTypes::alpha:
- if (curtok == "")
- {
- curtok += _now;
- return eatChar(str, npos);
- }
- else
- {
- if (isWord(curtok))
- {
- curtok += _now;
- return eatChar(str, npos);
- }
- else
- {
- flushToken();
- return eatChar(str, pos);
- }
- }
- break;
- case cTypes::num:
- curtok += _now;
- return eatChar(str, npos);
- break;
- case cTypes::op_bracket:
- case cTypes::cls_bracket:
- case cTypes::op_par:
- case cTypes::cls_par:
- case cTypes::op_cbrack:
- case cTypes::cls_cbrack:
- case cTypes::semicolon:
- case cTypes::underscore:
- case cTypes::dollarsign:
- case cTypes::andsign:
- case cTypes::asterisk:
- case cTypes::div:
- case cTypes::idiv:
- case cTypes::tiret:
- case cTypes::plus:
- case cTypes::equal:
- case cTypes::exp:
- case cTypes::dash:
- case cTypes::tilde:
- case cTypes::imp:
- case cTypes::interr:
- case cTypes::virg:
- case cTypes::dpoint:
- case cTypes::point:
- case cTypes::modulo:
- case cTypes::lowthan:
- case cTypes::grethan:
- if (curtok != "")
- {
- flushToken();
- return eatChar(str, pos);
- }
- else
- {
- curtok += _now;
- flushToken();
- return eatChar(str,npos);
- }
- break;
- case cTypes::space:
- if (curtok != "")
- {
- flushToken();
- return eatChar(str,npos);
- }
- else
- {
- try{
- if (isspace(str[npos]))
- return eatChar(str, npos + 1);
- else
- return eatChar(str, npos);
- }
- catch (const std::out_of_range& oor)
- {
- return 1;
- }
- }
- case cTypes::unknown:
- return eatChar(str,npos);
- break;
- default:
- return eatChar(str,npos);
- break;
- }
- return 1;
- }
- bool Lexer::isWord(string const& str)
- {
- for (int j(0); j < str.length(); j++)
- {
- if (isalpha(str[j]))
- continue;
- else
- return 0;
- }
- return 1;
- }
- cTypes Lexer::nGettype(char const& c)
- {
- if (isalpha(c))
- return cTypes::alpha;
- else if (isdigit(c))
- return cTypes::num;
- else if (isspace(c))
- return cTypes::space;
- switch (c)
- {
- case '[':
- return cTypes::op_bracket;
- break;
- case ']':
- return cTypes::cls_bracket;
- break;
- ///////////////////////
- case '(':
- return cTypes::op_par;
- break;
- case ')':
- return cTypes::cls_par;
- break;
- ///////////////////////
- case '{':
- return cTypes::op_cbrack;
- break;
- case '}':
- return cTypes::cls_cbrack;
- break;
- ///////////////////////
- case '>':
- return cTypes::grethan;
- break;
- case '<':
- return cTypes::lowthan;
- break;
- ///////////////////////
- case '-':
- return cTypes::tiret; // Also considered as minus
- break;
- case '_':
- return cTypes::underscore;
- break;
- case '~':
- return cTypes::tilde;
- break;
- ///////////////////////
- case '.':
- return cTypes::point;
- break;
- case ':':
- return cTypes::dpoint;
- break;
- case ',':
- return cTypes::virg;
- break;
- case ';':
- return cTypes::semicolon;
- break;
- case '!':
- return cTypes::imp;
- break;
- case '?':
- return cTypes::interr;
- break;
- case '#':
- return cTypes::dash;
- break;
- case '$':
- return cTypes::dollarsign;
- break;
- case '&':
- return cTypes::andsign;
- break;
- ///////////////////////
- case '/':
- return cTypes::div;
- break;
- case '+':
- return cTypes::plus;
- break;
- case '^':
- return cTypes::exp;
- break;
- case '=':
- return cTypes::equal;
- break;
- case '*':
- return cTypes::asterisk;
- break;
- case '%':
- return cTypes::modulo;
- break;
- case '\\':
- return cTypes::idiv;
- break;
- case '"':
- return cTypes::quote;
- break;
- case ' ':
- return cTypes::space;
- break;
- case '\'':
- return cTypes::apos;
- break;
- ///////////////////////idk
- default:
- return cTypes::unknown;
- break;
- }
- }
- nToken::tTypes Lexer::idTok(string tok)
- {
- return nToken::tTypes::undefined;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement