Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* lexical.cpp -- version 0.2, May 1st, 2015
- Copyright (C) 2015 Raphaël Dujardin
- This software is provided 'as-is', without any express or implied
- warranty. In no event will the authors be held liable for any damages
- arising from the use of this software.
- Permission is granted to anyone to use this software for any purpose,
- including commercial applications, and to alter it and redistribute it
- freely, subject to the following restrictions:
- 1. The origin of this software must not be misrepresented; you must not
- claim that you wrote the original software. If you use this software
- in a product, an acknowledgment in the product documentation would be
- appreciated but is not required.
- 2. Altered source versions must be plainly marked as such, and must not be
- misrepresented as being the original software.
- 3. This notice may not be removed or altered from any source distribution.
- Raphaël Dujardin
- rdujardin.com
- */
- #include "lexical.h"
- namespace lex
- {
- Token::Token(TokenType _type,std::string _value)
- {
- type=_type;
- value=_value;
- }
- Token::Token(const Token& token)
- {
- (*this)=token;
- }
- Token::~Token()
- {
- //.
- }
- void Token::print(std::ostream& out,bool withValue,bool withEol) const
- {
- switch(type)
- {
- case NULLTOKEN:
- out << "NULLTOKEN";
- if(withEol) out << std::endl;
- break;
- case INTEGER:
- out << "INTEGER";
- if(withValue) out << " # " << value;
- if(withEol) out << std::endl;
- break;
- case FLOATING:
- out << "FLOATING";
- if(withValue) out << " # " << value;
- if(withEol) out << std::endl;
- break;
- case STRING:
- out << "STRING";
- if(withValue) out << " # " << value;
- if(withEol) out << std::endl;
- break;
- case NAME:
- out << "NAME";
- if(withValue) out << " # " << value;
- if(withEol) out << std::endl;
- break;
- case PLUS:
- out << "PLUS";
- if(withEol) out << std::endl;
- break;
- case MINUS:
- out << "MINUS";
- if(withEol) out << std::endl;
- break;
- case MULTIPLY:
- out << "MULTIPLY";
- if(withEol) out << std::endl;
- break;
- case DIVIDE:
- out << "DIVIDE";
- if(withEol) out << std::endl;
- break;
- case MODULO:
- out << "MODULO";
- if(withEol) out << std::endl;
- break;
- case EQUAL:
- out << "EQUAL";
- if(withEol) out << std::endl;
- break;
- case LPARENTHESIS:
- out << "LPARENTHESIS";
- if(withEol) out << std::endl;
- break;
- case RPARENTHESIS:
- out << "RPARENTHESIS";
- if(withEol) out << std::endl;
- break;
- case LBRACKET:
- out << "LBRACKET";
- if(withEol) out << std::endl;
- break;
- case RBRACKET:
- out << "RBRACKET";
- if(withEol) out << std::endl;
- break;
- case LSQUARE:
- out << "LSQUARE";
- if(withEol) out << std::endl;
- break;
- case RSQUARE:
- out << "RSQUARE";
- if(withEol) out << std::endl;
- break;
- case COMMA:
- out << "COMMA";
- if(withEol) out << std::endl;
- break;
- case SEMICOLON:
- out << "SEMICOLON";
- if(withEol) out << std::endl;
- break;
- case INFERIOR:
- out << "INFERIOR";
- if(withEol) out << std::endl;
- break;
- case SUPERIOR:
- out << "SUPERIOR";
- if(withEol) out << std::endl;
- break;
- case BREAK:
- out << "BREAK";
- if(withEol) out << std::endl;
- break;
- case CASE:
- out << "CASE";
- if(withEol) out << std::endl;
- break;
- case CATCH:
- out << "CATCH";
- if(withEol) out << std::endl;
- break;
- case CONST:
- out << "CONST";
- if(withEol) out << std::endl;
- break;
- case CONTINUE:
- out << "CONTINUE";
- if(withEol) out << std::endl;
- break;
- case DEBUGGER:
- out << "DEBUGGER";
- if(withEol) out << std::endl;
- break;
- case DO:
- out << "DO";
- if(withEol) out << std::endl;
- break;
- case ELSE:
- out << "ELSE";
- if(withEol) out << std::endl;
- break;
- case FINALLY:
- out << "FINALLY";
- if(withEol) out << std::endl;
- break;
- case FOR:
- out << "FOR";
- if(withEol) out << std::endl;
- break;
- case FUNCTION:
- out << "FUNCTION";
- if(withEol) out << std::endl;
- break;
- case IF:
- out << "IF";
- if(withEol) out << std::endl;
- break;
- case IN:
- out << "IN";
- if(withEol) out << std::endl;
- break;
- case INSTANCEOF:
- out << "INSTANCEOF";
- if(withEol) out << std::endl;
- break;
- case LET:
- out << "LET";
- if(withEol) out << std::endl;
- break;
- case NEW:
- out << "NEW";
- if(withEol) out << std::endl;
- break;
- case RETURN:
- out << "RETURN";
- if(withEol) out << std::endl;
- break;
- case SWITCH:
- out << "SWITCH";
- if(withEol) out << std::endl;
- break;
- case THIS:
- out << "THIS";
- if(withEol) out << std::endl;
- break;
- case THROW:
- out << "THROW";
- if(withEol) out << std::endl;
- break;
- case TRY:
- out << "TRY";
- if(withEol) out << std::endl;
- break;
- case TYPEOF:
- out << "TYPEOF";
- if(withEol) out << std::endl;
- break;
- case VAR:
- out << "VAR";
- if(withEol) out << std::endl;
- break;
- case VOID:
- out << "VOID";
- if(withEol) out << std::endl;
- break;
- case WHILE:
- out << "WHILE";
- if(withEol) out << std::endl;
- break;
- case DBEQUAL:
- out << "DBEQUAL";
- if(withEol) out << std::endl;
- break;
- case DBPLUS:
- out << "DBPLUS";
- if(withEol) out << std::endl;
- break;
- case DBMINUS:
- out << "DBMINUS";
- if(withEol) out << std::endl;
- break;
- default:
- out << "?ERROR?";
- if(withEol) out << std::endl;
- break;
- }
- }
- void Token::printType(std::ostream& out) const
- {
- print(out,false);
- }
- void Token::printCode(std::ostream& out) const
- {
- out << value;
- }
- Token& Token::operator=(const Token &token)
- {
- type=token.type;
- value=token.value;
- return *this;
- }
- Sequence::Sequence()
- {
- //.
- }
- Sequence::Sequence(const std::string& code)
- {
- lex(code);
- }
- Sequence::Sequence(const Token& token)
- {
- (*this)+=token;
- }
- Sequence::Sequence(const Sequence& sequence)
- {
- (*this)=sequence;
- }
- Sequence::~Sequence()
- {
- //.
- }
- void Sequence::print(std::ostream& out,bool compact) const
- {
- if(!compact) out << std::endl;
- out << "[";
- if(!compact) out << std::endl;
- for(std::deque<Token>::const_iterator it=tokens.begin();it!=tokens.end();it++)
- {
- (*it).print(out,true,!compact);
- out << ((compact)?((it+1==tokens.end())?" ":" | "):"");
- }
- if(!compact) out << std::endl;
- out << "]";
- if(!compact) out << std::endl;
- }
- void Sequence::printCode(std::ostream& out) const
- {
- for(std::deque<Token>::const_iterator it=tokens.begin();it!=tokens.end();it++)
- {
- it->printCode(out);
- }
- }
- Sequence& Sequence::operator=(const Sequence &sequence)
- {
- tokens=sequence.tokens;
- return *this;
- }
- Sequence& Sequence::operator+=(const Sequence &sequence)
- {
- tokens.insert(tokens.end(),sequence.tokens.begin(),sequence.tokens.end());
- return *this;
- }
- Sequence& Sequence::operator+=(const Token &token)
- {
- tokens.insert(tokens.end(),token);
- return *this;
- }
- std::string Sequence::toStr(const std::vector<char> &buf)
- {
- return std::string(buf.begin(),buf.end());
- }
- std::pair<TokenType,std::vector<char>::const_iterator> Sequence::numberBuf(const std::vector<char> &buf)
- {
- bool isInteger=true;
- for(std::vector<char>::const_iterator it=buf.begin();it!=buf.end();it++)
- {
- if(*it<0x30 or *it>0x39)
- {
- if(*it=='.' and isInteger) isInteger=false;
- else return std::pair<TokenType,std::vector<char>::const_iterator>((isInteger)?INTEGER:FLOATING,it);
- }
- }
- return std::pair<TokenType,std::vector<char>::const_iterator>((isInteger)?INTEGER:FLOATING,buf.end());
- }
- Sequence Sequence::analyzeBuf(const std::vector<char> &buf)
- {
- std::pair<TokenType,std::vector<char>::const_iterator> num=numberBuf(buf);
- if(num.second==buf.end()) return Sequence(Token(num.first,toStr(buf)));
- else
- {
- if(num.second!=buf.begin())
- {
- return Sequence(Token(num.first,std::string(buf.begin(),num.second)))+Token(NAME,std::string(num.second,buf.end()));
- }
- }
- std::string s=toStr(buf);
- if(s=="==") return Sequence(Token(DBEQUAL));
- if(s=="break") return Sequence(Token(BREAK));
- if(s=="case") return Sequence(Token(CASE));
- if(s=="catch") return Sequence(Token(CATCH));
- if(s=="const") return Sequence(Token(CONST));
- if(s=="continue") return Sequence(Token(CONTINUE));
- if(s=="debugger") return Sequence(Token(DEBUGGER));
- if(s=="do") return Sequence(Token(DO));
- if(s=="else") return Sequence(Token(ELSE));
- if(s=="finally") return Sequence(Token(FINALLY));
- if(s=="for") return Sequence(Token(FOR));
- if(s=="function") return Sequence(Token(FUNCTION));
- if(s=="if") return Sequence(Token(IF));
- if(s=="in") return Sequence(Token(IN));
- if(s=="instanceof") return Sequence(Token(INSTANCEOF));
- if(s=="let") return Sequence(Token(LET));
- if(s=="new") return Sequence(Token(NEW));
- if(s=="return") return Sequence(Token(RETURN));
- if(s=="switch") return Sequence(Token(SWITCH));
- if(s=="this") return Sequence(Token(THIS));
- if(s=="throw") return Sequence(Token(THROW));
- if(s=="try") return Sequence(Token(TRY));
- if(s=="typeof") return Sequence(Token(TYPEOF));
- if(s=="var") return Sequence(Token(VAR));
- if(s=="void") return Sequence(Token(VOID));
- if(s=="while") return Sequence(Token(WHILE));
- return Sequence(Token(NAME,s));
- }
- void Sequence::lex(const std::string& code)
- {
- std::vector<char> buffer;
- char temp=0;
- bool multiCommentary=false, lineCommentary=false, quoteString=false, dbQuoteString=false;
- for(std::string::const_iterator it=code.begin();it!=code.end();it++)
- {
- if(multiCommentary)
- {
- if(*it=='/' and temp=='*') { temp=0; multiCommentary=false; }
- if(*it=='*') temp=*it;
- else temp=0;
- }
- else if(lineCommentary)
- {
- if(*it=='\n') { lineCommentary=false; }
- }
- else if(quoteString)
- {
- if(*it=='\'' and temp!='\\')
- {
- temp=0; quoteString=false;
- (*this)+=Token(STRING,toStr(buffer));
- buffer.clear();
- }
- else
- {
- if(*it=='\\') { temp=*it; }
- else { temp=0; }
- buffer.push_back(*it);
- }
- }
- else if(dbQuoteString)
- {
- if(*it=='"' and temp!='\\')
- {
- temp=0; dbQuoteString=false;
- (*this)+=Token(STRING,toStr(buffer));
- buffer.clear();
- }
- else
- {
- if(*it=='\\') { temp=*it; }
- else { temp=0; buffer.push_back(*it); }
- }
- }
- else
- {
- if(*it=='<' or *it=='>' or *it==' ' or *it=='\n' or *it=='\r' or *it=='\t' or *it=='+' or *it=='-' or *it=='*' or *it=='/' or *it=='%' or *it=='=' or *it=='(' or *it==')' or *it=='{' or *it=='}' or *it=='[' or *it==']' or *it==',' or *it==';' or *it=='\'' or *it=='"')
- {
- if(!buffer.empty())
- {
- (*this)+=analyzeBuf(buffer);
- buffer.clear();
- }
- Token token;
- bool add=true;
- if(*it=='<') token.type=INFERIOR;
- if(*it=='>') token.type=SUPERIOR;
- if(*it=='+') { if(tokens.back().type==PLUS) { tokens[tokens.size()-1].type=DBPLUS; add=false; } else token.type=PLUS;}
- if(*it=='-') { if(tokens.back().type==MINUS) { tokens[tokens.size()-1].type=DBMINUS; add=false; } else token.type=MINUS;}
- if(*it=='*') { if(tokens.back().type==DIVIDE) { tokens.pop_back(); multiCommentary=true; add=false; } else token.type=MULTIPLY; }
- if(*it=='/') { if(tokens.back().type==DIVIDE) { tokens.pop_back(); lineCommentary=true; add=false; } else token.type=DIVIDE; }
- if(*it=='%') token.type=MODULO;
- if(*it=='\'') { quoteString=true; add=false; }
- if(*it=='"') { dbQuoteString=true; add=false; }
- if(*it==',') token.type=COMMA;
- if(*it==';') token.type=SEMICOLON;
- if(*it=='(') token.type=LPARENTHESIS;
- if(*it==')') token.type=RPARENTHESIS;
- if(*it=='{') token.type=LBRACKET;
- if(*it=='}') token.type=RBRACKET;
- if(*it=='[') token.type=LSQUARE;
- if(*it==']') token.type=RSQUARE;
- if(*it=='=') { if(tokens.back().type==EQUAL) { tokens[tokens.size()-1].type=DBEQUAL; add=false; } else token.type=EQUAL;}
- if(*it!=' ' && *it!='\n' && *it!='\r' && *it!='\t' && add) { (*this)+=token; }
- }
- else buffer.push_back(*it);
- }
- }
- if(!buffer.empty())
- {
- (*this)+=analyzeBuf(buffer);
- }
- }
- }
- std::ostream& operator<<(std::ostream& stream,const lex::TokenType &tokenType)
- {
- lex::Token(tokenType).printType(stream);
- return stream;
- }
- std::ostream& operator<<(std::ostream& stream,const lex::Token& token)
- {
- token.print(stream);
- return stream;
- }
- std::ostream& operator<<(std::ostream& stream,const lex::Sequence& sequence)
- {
- sequence.print(stream);
- return stream;
- }
- lex::Sequence operator+(const lex::Sequence &seq1,const lex::Sequence &seq2)
- {
- lex::Sequence seqr(seq1);
- seqr+=seq2;
- return seqr;
- }
- lex::Sequence operator+(const lex::Sequence &seq,const lex::Token &token)
- {
- lex::Sequence seqr(seq);
- seqr+=token;
- return seqr;
- }
- lex::Sequence operator+(const lex::Token &token1,const lex::Token &token2)
- {
- lex::Sequence seqr(token1);
- seqr+=token2;
- return seqr;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement