Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //
- // Created by ottomated on 27/11/18.
- //
- #include "Tokenizer.h"
- #include <ctype.h>
- #include <string>
- #include <unordered_map>
- using namespace std;
- Tokenizer::Tokenizer(string p) : program(std::move(p)), currentToken(Token(Empty, 0)), pos(0) {}
- Token Tokenizer::getNextToken() {
- int i = pos;
- pos++;
- if (i > program.size()) {
- currentToken = Token(Eof);
- return currentToken;
- }
- char c = program[i];
- while (isspace(c)) {
- i++;
- pos++;
- c = program[i];
- }
- if (!c) {
- currentToken = Token(Eof);
- return currentToken;
- }
- // Find single chars that can't be confused
- unordered_map<char, TokenType> singleChars = {
- {'(', LeftParen},
- {')', RightParen},
- {'[', LeftBracket},
- {']', RightBracket},
- {'{', LeftBrace},
- {'}', RightBrace},
- {';', Semicolon},
- {':', Colon},
- {',', Comma},
- {'.', Dot},
- {'!', Not},
- };
- auto it = singleChars.find(c);
- if (it != singleChars.end()) {
- currentToken = Token(it->second);
- return currentToken;
- }
- // Find 2-char tokens that could be confused
- string nextTwo = program.substr(i, 2);
- {
- unordered_map<string, TokenType> doubleChars = {
- {"++", Increment},
- {"--", Decrement},
- {"+=", PlusEquals},
- {"-=", MinusEquals},
- {"*=", MultiplyEquals},
- {"/=", DivideEquals},
- {"%=", ModuloEquals},
- {"<=", LessThanEquals},
- {">=", GreaterThanEquals},
- {"==", DoubleEquals},
- {"&&", And},
- {"||", Or},
- };
- auto it = doubleChars.find(nextTwo);
- if (it != doubleChars.
- end()
- ) {
- pos++; // Go one more forward
- currentToken = Token(it->second);
- return
- currentToken;
- }
- }
- // Find confusable single chars
- {
- unordered_map<char, TokenType> singleChars = {
- {'+', Plus},
- {'-', Minus},
- {'*', Multiply},
- {'/', Divide},
- {'%', Modulo},
- {'=', SingleEquals},
- {'<', LessThan},
- {'>', GreaterThan},
- {'&', BinaryAnd},
- {'|', BinaryOr},
- {'^', BinaryXor},
- };
- auto it = singleChars.find(c);
- if (it != singleChars.
- end()
- ) {
- currentToken = Token(it->second);
- return
- currentToken;
- }
- }
- string parse;
- // Integers
- if (isdigit(c)) {
- while (isdigit(c)) {
- parse += c;
- pos++;
- i++;
- c = program[i];
- }
- currentToken = Token(Integer, stoi(parse));
- return currentToken;
- }
- // Strings
- if (c == '"') {
- bool isEscaping = false;
- do {
- pos++;
- i++;
- c = program[i];
- parse += c;
- } while (program[pos] != '"');
- pos++;
- currentToken = Token(String, parse);
- return currentToken;
- }
- const string validNameChars = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM_-";
- while (validNameChars.find(c) != validNameChars.npos) {
- parse += c;
- i++;
- pos++;
- c = program[i];
- }
- if (parse.size() == 0) {
- currentToken = Token(Empty);
- } else {
- currentToken = Token(Name, parse);
- return currentToken;
- }
- }
Add Comment
Please, Sign In to add comment