Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma once
- #include <utility>
- #include <variant>
- #include <sstream>
- #include <algorithm>
- #include <vector>
- #include <unordered_set>
- #include "StreamTable.h"
- struct LexicalError : public std::runtime_error {
- explicit LexicalError(const std::string& what) : std::runtime_error(what) {
- }
- };
- struct SymbolToken {
- SymbolToken(const std::string& str) {
- name = str;
- }
- SymbolToken(char c) {
- name += c;
- }
- std::string name;
- bool operator==(const SymbolToken& rhs) const {
- return name == rhs.name;
- }
- };
- struct QuoteToken {
- bool operator==(const QuoteToken& rhs) const {
- return true;
- }
- };
- struct DotToken {
- bool operator==(const DotToken& rhs) const {
- return true;
- }
- };
- enum class BracketToken { OPEN, CLOSE };
- enum class BooleanToken { TRUE, FALSE };
- struct ConstantToken {
- int value;
- bool operator==(const ConstantToken& rhs) const {
- return value == rhs.value;
- }
- };
- typedef std::variant<ConstantToken, BracketToken, BooleanToken, QuoteToken, DotToken, SymbolToken>
- Token;
- class Tokenizer {
- std::vector<Token> tokens_;
- bool is_end_ = false;
- bool is_new_variable = false;
- bool is_method_declaration = false;
- std::istream* in_;
- std::unordered_set<std::string> variables;
- std::unordered_set<std::string> operators{
- "+",
- "if",
- "boolean?",
- "quote",
- "not",
- "and",
- ">",
- "<",
- "=",
- "define",
- "set!",
- "or",
- "number?",
- ">=",
- "<=",
- "*",
- "-",
- "/",
- "max",
- "min",
- "abs",
- "symbol?",
- "pair?",
- "null?",
- "list?",
- "cons",
- "car",
- "cdr",
- "set-car!",
- "set-cdr!",
- "list",
- "list-ref",
- "list-tail",
- "lambda",
- "eval"};
- public:
- Tokenizer(std::istream* in) {
- in_ = in;
- Next();
- }
- bool IsEnd() {
- return is_end_;
- }
- void Next(bool lexicalCheck = false) {
- char c, fc;
- bool is_successfull_token = false;
- while (!is_successfull_token) {
- if (!in_->get(c)) {
- is_end_ = true;
- break;
- }
- fc = c;
- is_successfull_token = true;
- std::vector<char> except_chars = {'+', '-', '*', '>', '<', '=', '/'};
- switch (fc) {
- case ')':
- tokens_.emplace_back(BracketToken::CLOSE);
- break;
- case '(':
- tokens_.emplace_back(BracketToken::OPEN);
- break;
- case '\'':
- tokens_.emplace_back(QuoteToken());
- break;
- case '.':
- tokens_.emplace_back(DotToken());
- break;
- case '#':
- if (in_->peek() != EOF && (in_->peek() == 't' || in_->peek() == 'f')) {
- in_->get(fc);
- tokens_.emplace_back(fc == 't' ? BooleanToken::TRUE : BooleanToken::FALSE);
- break;
- }
- default:
- if (std::isdigit(fc) || ((fc == '-' || fc == '+') && in_->peek() != EOF &&
- std::isdigit(in_->peek()))) {
- int value;
- if (fc == '-' || fc == '+') {
- value = 0;
- } else {
- value = fc - '0';
- }
- while (in_->peek() != EOF && std::isdigit(in_->peek())) {
- in_->get(c);
- value *= 10;
- value += c - '0';
- }
- if (fc == '-') {
- value *= -1;
- }
- if (lexicalCheck) {
- std::string next_chars;
- while (in_->peek() != EOF && std::isgraph(in_->peek())) {
- in_->get(c);
- if (c == ')')
- break;
- next_chars += c;
- }
- if (next_chars.length() > 0) {
- tokens_.emplace_back(SymbolToken(std::to_string(value) + next_chars));
- throw LexicalError("Invalid lexeme name:");
- }
- }
- tokens_.emplace_back(ConstantToken{value});
- } else if (std::isalpha(fc)) {
- std::vector<char> possible_signs = {'+', '-', '*', '?', '!'};
- std::string cur_token;
- cur_token += fc;
- while (in_->peek() != EOF &&
- (std::isdigit(in_->peek()) || std::isalpha(in_->peek()) ||
- std::find(possible_signs.begin(), possible_signs.end(),
- in_->peek()) != possible_signs.end())) {
- in_->get(c);
- cur_token += c;
- }
- tokens_.emplace_back(SymbolToken{cur_token});
- } else if (std::find(except_chars.begin(), except_chars.end(), fc) !=
- except_chars.end()) {
- if (in_->peek() != EOF && (in_->peek() == '=')) {
- std::string s = "ab";
- s[0] = fc;
- in_->get(s[1]);
- tokens_.emplace_back(s);
- } else {
- tokens_.emplace_back(SymbolToken(fc));
- }
- } else {
- is_successfull_token = false;
- }
- }
- }
- if (lexicalCheck) {
- if (auto pval = std::get_if<SymbolToken>(&tokens_.back())) {
- if (is_new_variable || is_method_declaration) {
- if (is_new_variable && variables.find(pval->name) != variables.end())
- throw LexicalError("Repeated declaration: ");
- else if (operators.find(pval->name) != operators.end())
- throw LexicalError("Invalid use of operator: ");
- else
- variables.insert(pval->name);
- is_new_variable = false;
- } else {
- if (operators.find(pval->name) == operators.end() && variables.find(pval->name) == variables.end())
- throw LexicalError("Unknown lexeme: ");
- if (pval->name == "define" || pval->name == "lambda")
- is_new_variable = true;
- }
- } else if (is_new_variable || is_method_declaration) {
- if (fc != '(' && fc != ')')
- throw LexicalError("Illegal variable name: ");
- else
- is_method_declaration ^= true;
- }
- }
- }
- Token GetToken() {
- if (!tokens_.empty()) {
- return tokens_.back();
- }
- }
- std::vector<Token> GetTokens() {
- return tokens_;
- }
- std::string ClassifyTokens() {
- try {
- while (!IsEnd()) {
- Next(true);
- }
- auto tokens = GetTokens();
- std::stringstream table;
- StreamTable st(table);
- st.AddCol(5);
- st.AddCol(15);
- st.AddCol(20);
- st.MakeBorderExt(true);
- st.SetDelimRow(true, '-');
- st.SetDelimCol(true, '|');
- st << "Number" << "Name" << "Type";
- for (int i = 0; i < tokens.size(); ++i) {
- st << i + 1 << GetTokenName(tokens[i]) << GetTokenType(tokens[i]);
- }
- return table.str();
- } catch (LexicalError &err) {
- return err.what() + GetTokenName(tokens_.back());
- }
- }
- std::string GetTokenType(const Token &token) {
- if (auto pval = std::get_if<ConstantToken>(&token)) {
- return "ConstantToken";
- } else if (auto pval = std::get_if<SymbolToken>(&token)) {
- return (operators.find(pval->name) == operators.end() ? "Variable" : "Operator");
- } else if (auto pval = std::get_if<BooleanToken>(&token)) {
- return "BooleanToken";
- } else if (std::get_if<QuoteToken>(&token)) {
- return "QuoteToken";
- } else if (std::get_if<DotToken>(&token)) {
- return "DotToken";
- } else if (auto pval = std::get_if<BracketToken >(&token))
- return "BracketToken";
- }
- std::string GetTokenName(const Token &token) {
- if (auto pval = std::get_if<ConstantToken>(&token)) {
- return std::to_string(pval->value);
- } else if (auto pval = std::get_if<SymbolToken>(&token)) {
- return pval->name;
- } else if (auto pval = std::get_if<BooleanToken>(&token)) {
- return (*pval == BooleanToken::TRUE ? "#t" : "#f");
- } else if (std::get_if<QuoteToken>(&token)) {
- return "'";
- } else if (std::get_if<DotToken>(&token)) {
- return ".";
- } else if (auto pval = std::get_if<BracketToken >(&token))
- return (*pval == BracketToken::OPEN ? "(" : ")");
- }
- };
- std::vector<Token> Read(const std::string& string);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement