Advertisement
Ardente

Token Class

Aug 20th, 2020
286
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 14.39 KB | None | 0 0
  1. #pragma once
  2.  
  3. #include <map>
  4. #include <string>
  5. #include <variant>
  6. #include <cassert>
  7. #include <cassert>
  8.  
  9. namespace TokenTools
  10. {
  11.     enum class TokenGroup
  12.     {
  13.         KEYWORD,
  14.         IDENTIFIER,
  15.         NUMERIC_LITERAL,
  16.         USER_LITERAL,
  17.         CHARACTER_LITERAL,
  18.         STRING_LITERAL,
  19.         OPERATORS,
  20.         PUNCTUATORS
  21.     };
  22.  
  23.     enum class UniCharOperators
  24.     {
  25.         MEMBER_OBJECT_ACCESS = '.',
  26.         ARRAY_SUBSCRIPT_LEFT = '[',
  27.         ARRAY_SUBSCRIPT_RIGHT = ']',
  28.         LEFT_PARANTHESIS = '(',
  29.         RIGHT_PARANTHESIS = ')',
  30.         DOUBLE_QUOUTE = '\"',
  31.         SINGLE_QUOUTE = '\'',
  32.         BINARY_NOT = '~',
  33.         LOGICAL_NOT = '!',
  34.         UNARY_NEGATION = 0,
  35.         UNARY_PLUS = 1,
  36.         ADDRESS_ACCESS = 2,
  37.         DEREFERENCE = 3,
  38.         MULTIPLICATION = 4,
  39.         DIVISION = '/',
  40.         MODULUS = '%',
  41.         ADDITION = 5,
  42.         SUBTRACTION = '-',
  43.         LESS_THAN = '<',
  44.         GREATER_THAN = '>',
  45.         BITWISE_AND = '&',
  46.         BITWISE_EXCLUSIVE_OR = '^',
  47.         BITWISE_INCLUSIVE_OR = '|',
  48.         TERNARY_LEFT = '?',
  49.         TERNARY_RIGHT = ':',
  50.         ASSIGNMENT = '=',
  51.         COMMA = ',',
  52.  
  53.         // For ambiguous symbols (ie *, +, and -)
  54.         STAR_SYMBOL = '*',
  55.         PLUS_SIGN   = '+',
  56.         MINUS_SIGN  = '-',
  57.         AND_SIGN    = '&'
  58.     };
  59.  
  60.     enum class MultiCharOperators
  61.     {
  62.         SCOPE_RESOLUTION,
  63.         MEMBER_ACCESS_POINTER,
  64.         ARRAY_SUBSCRIPT,
  65.         FUNCTION_CALL,
  66.         POSTFIX_INCREMENT,
  67.         POSTFIX_DECREMENT,
  68.         TYPE_NAME,
  69.         CONST_TYPE_CONERSION,
  70.         DYNAMIC_TYPE_CONVERSION,
  71.         REINTERPRETED_TYPE_CONVERSION,
  72.         STATIC_TYPE_CONVERSION,
  73.         SIZE_OF_OBJECT,
  74.         PREFIX_INCREMENT,
  75.         PREFIX_DECREMENT,
  76.         CREATE_HEAP_OBJECT,
  77.         DESTROY_HEAP_OBJECT,
  78.         TYPE_CAST,
  79.         MEMBER_ACCESS_OBJECT_DEREFERENCE,
  80.         MEMBER_ACCESS_POINTER_DEREFERENCE,
  81.         BINARY_LEFT_SHIFT,
  82.         BINARY_RIGHT_SHIFT,
  83.         LESS_THAN_OR_EQUAL_TO,
  84.         GREATER_THAN_OR_EQUAL_TO,
  85.         EQUALITY,
  86.         INEQUALITY,
  87.         LOGICAL_AND,
  88.         LOGICAL_OR,
  89.         TERNARY_OPERATOR,
  90.         MULTIPLY_ASSIGNMENT,
  91.         DIVISION_ASSIGNMENT,
  92.         MODULUS_ASSIGNMENT,
  93.         ADDITION_ASSIGNMENT,
  94.         SUBTRACTION_ASSIGNMENT,
  95.         LEFT_SHIFT_ASSIGNMENT,
  96.         RIGHT_SHIFT_ASSIGNMENT,
  97.         BITWISE_AND_ASSIGNMENT,
  98.         BITWISE_INCLUSIVE_OR_ASSIGNMENT,
  99.         BITWISE_EXCLUSIVE_OR_ASSIGNMENT,
  100.         THROW_EXCEPTION,
  101.        
  102.         // UniCharacter Operators that have multi-character counterparts
  103.         BINARY_NOT,
  104.         LOGICAL_NOT,
  105.         BITWISE_AND,
  106.         BITWISE_EXCLUSIVE_OR,
  107.         BITWISE_INCLUSIVE_OR,
  108.         DOUBLE_QUOUTES,
  109.         SINGLE_QUOUTES,
  110.  
  111.         // Generalized operator types to avoid duplicate keys
  112.         PARANTHESIS,
  113.         INCREMENT,
  114.         DECREMENT
  115.     };
  116. }
  117.  
  118. using namespace TokenTools;
  119.  
  120. namespace
  121. {
  122.     std::map<std::string, MultiCharOperators> multiCharOperatorMappings{
  123.         {"::", MultiCharOperators::SCOPE_RESOLUTION},
  124.         {"->", MultiCharOperators::MEMBER_ACCESS_POINTER},
  125.         {"[]", MultiCharOperators::ARRAY_SUBSCRIPT},
  126.         {"()", MultiCharOperators::PARANTHESIS},
  127.         {"++", MultiCharOperators::INCREMENT},
  128.         {"--", MultiCharOperators::DECREMENT},
  129.         {"typeid", MultiCharOperators::TYPE_NAME},
  130.         {"const_cast", MultiCharOperators::CONST_TYPE_CONERSION},
  131.         {"dynamic_cast", MultiCharOperators::DYNAMIC_TYPE_CONVERSION},
  132.         {"reinterpret_cast", MultiCharOperators::REINTERPRETED_TYPE_CONVERSION},
  133.         {"static_cast", MultiCharOperators::STATIC_TYPE_CONVERSION},
  134.         {"sizeof", MultiCharOperators::SIZE_OF_OBJECT},
  135.         {"compl", MultiCharOperators::BINARY_NOT},
  136.         {"not", MultiCharOperators::LOGICAL_NOT},
  137.         {"new", MultiCharOperators::CREATE_HEAP_OBJECT},
  138.         {"delete", MultiCharOperators::DESTROY_HEAP_OBJECT},
  139.         {".*", MultiCharOperators::MEMBER_ACCESS_OBJECT_DEREFERENCE},
  140.         {"->*", MultiCharOperators::MEMBER_ACCESS_POINTER_DEREFERENCE},
  141.         {">>", MultiCharOperators::BINARY_LEFT_SHIFT},
  142.         {"<<", MultiCharOperators::BINARY_RIGHT_SHIFT},
  143.         {"<=", MultiCharOperators::LESS_THAN_OR_EQUAL_TO},
  144.         {">=", MultiCharOperators::GREATER_THAN_OR_EQUAL_TO},
  145.         {"==", MultiCharOperators::EQUALITY},
  146.         {"!=", MultiCharOperators::INEQUALITY},
  147.         {"not_eq", MultiCharOperators::INEQUALITY},
  148.         {"bitand", MultiCharOperators::BITWISE_AND},
  149.         {"xor", MultiCharOperators::BITWISE_EXCLUSIVE_OR},
  150.         {"bitor", MultiCharOperators::BITWISE_INCLUSIVE_OR},
  151.         {"&&", MultiCharOperators::LOGICAL_AND},
  152.         {"and", MultiCharOperators::LOGICAL_AND},
  153.         {"||", MultiCharOperators::LOGICAL_OR},
  154.         {"or", MultiCharOperators::LOGICAL_OR},
  155.         {"?:", MultiCharOperators::TERNARY_OPERATOR},
  156.         {"*=", MultiCharOperators::MULTIPLY_ASSIGNMENT},
  157.         {"/=", MultiCharOperators::DIVISION_ASSIGNMENT},
  158.         {"%=", MultiCharOperators::MODULUS_ASSIGNMENT},
  159.         {"+=", MultiCharOperators::ADDITION_ASSIGNMENT},
  160.         {"-=", MultiCharOperators::SUBTRACTION_ASSIGNMENT},
  161.         {"<<=", MultiCharOperators::LEFT_SHIFT_ASSIGNMENT},
  162.         {">>=", MultiCharOperators::RIGHT_SHIFT_ASSIGNMENT},
  163.         {"&=", MultiCharOperators::BITWISE_AND_ASSIGNMENT},
  164.         {"and_eq", MultiCharOperators::BITWISE_AND_ASSIGNMENT},
  165.         {"|=", MultiCharOperators::BITWISE_INCLUSIVE_OR_ASSIGNMENT},
  166.         {"or_eq", MultiCharOperators::BITWISE_INCLUSIVE_OR_ASSIGNMENT},
  167.         {"^=", MultiCharOperators::BITWISE_EXCLUSIVE_OR_ASSIGNMENT},
  168.         {"xor_eq", MultiCharOperators::BITWISE_EXCLUSIVE_OR_ASSIGNMENT},
  169.         {"\"\"", MultiCharOperators::DOUBLE_QUOUTES},
  170.         {"\'\'", MultiCharOperators::SINGLE_QUOUTES}
  171.     };
  172. }
  173.  
  174. class Token
  175. {
  176. public:
  177.     Token() = default;
  178.  
  179.     Token(TokenGroup group, std::variant<char, std::string> contents, Token* previousToken = nullptr);
  180.  
  181.     // Returns the contents of the token
  182.     std::string getContents();
  183.  
  184.     // Returns the token's group
  185.     TokenGroup& getGroup();
  186.  
  187.     // Comparison operators for comparing token types
  188.     bool operator==(const TokenGroup group) const
  189.     {
  190.         return mGroup == group;
  191.     }
  192.  
  193.     bool operator!=(const TokenGroup group) const
  194.     {
  195.         return mGroup != group;
  196.     }
  197.  
  198.     // Comparision operators for comparing operator type
  199.     bool operator==(const UniCharOperators otherOperator)
  200.     {
  201.         assert(!mOperatorType.index() && "Cannot compare a single character operator to a multi-character operator!\n");
  202.  
  203.         return std::get<UniCharOperators>(mOperatorType) == otherOperator;
  204.     }
  205.  
  206.     bool operator!=(const UniCharOperators otherOperator)
  207.     {
  208.         assert(!mOperatorType.index() && "Cannot compare a single character operator to a multi-character operator!\n");
  209.  
  210.         return std::get<UniCharOperators>(mOperatorType) != otherOperator;
  211.     }
  212.  
  213.     bool operator==(const MultiCharOperators otherOperator)
  214.     {
  215.         assert(!mOperatorType.index() && "Cannot compare a multi-character operator to a single character operator!\n");
  216.  
  217.         return std::get<MultiCharOperators>(mOperatorType) == otherOperator;
  218.     }
  219.  
  220.     bool operator!=(const MultiCharOperators  otherOperator)
  221.     {
  222.         assert(!mOperatorType.index() && "Cannot compare a multi-character operator to a single character operator!\n");
  223.  
  224.         return std::get<MultiCharOperators>(mOperatorType) != otherOperator;
  225.     }
  226.  
  227. private:
  228.     void resolveSingleCharTokenOperator(UniCharOperators operatorContents, Token* previousToken);
  229.  
  230.     void resolveSingleCharTokenPrecedence(UniCharOperators operatorContents);
  231.  
  232.     void resolveMultiCharTokenOperator(MultiCharOperators operatorContents, Token* previousToken);
  233.  
  234.     void resolveMultiCharTokenPrecedence(MultiCharOperators operatorContents);
  235.  
  236.     void calculateUniCharacter(UniCharOperators operatorContents, Token* previousToken);
  237.  
  238.     void calculateMultiCharacter(MultiCharOperators operatorContents, Token* previousToken);
  239.  
  240.     bool isMultiCharToken;
  241.  
  242.     int mPrecedenceGroup;
  243.  
  244.     TokenGroup mGroup;
  245.  
  246.     std::variant<char, std::string> mContents;
  247.     std::variant<UniCharOperators, MultiCharOperators> mOperatorType;
  248. };
  249.  
  250. #include "token.h"
  251.  
  252. using namespace TokenTools;
  253.  
  254. Token::Token(TokenGroup group, std::variant<char, std::string> contents, Token* previousToken) : mGroup(group), mContents(contents), mPrecedenceGroup(-1)
  255. {
  256.     isMultiCharToken = mContents.index();
  257.  
  258.     if (isMultiCharToken)
  259.     {
  260.         mOperatorType = multiCharOperatorMappings[std::get<std::string>(mContents)];
  261.  
  262.         switch (std::get<MultiCharOperators>(mOperatorType))
  263.         {
  264.         case MultiCharOperators::INCREMENT: case MultiCharOperators::DECREMENT: case MultiCharOperators::PARANTHESIS:
  265.             resolveMultiCharTokenOperator(std::get<MultiCharOperators>(mOperatorType), previousToken);
  266.             break;
  267.  
  268.         default:
  269.             break;
  270.         }
  271.  
  272.         resolveMultiCharTokenPrecedence(std::get<MultiCharOperators>(mOperatorType));
  273.     }
  274.     else
  275.     {
  276.         mOperatorType = UniCharOperators{ std::get<char>(mContents) };
  277.        
  278.         switch (std::get<UniCharOperators>(mOperatorType))
  279.         {
  280.         case UniCharOperators::STAR_SYMBOL: case UniCharOperators::PLUS_SIGN: case UniCharOperators::MINUS_SIGN:
  281.         case UniCharOperators::AND_SIGN:
  282.             resolveSingleCharTokenOperator(std::get<UniCharOperators>(mOperatorType), previousToken);
  283.             break;
  284.  
  285.         default:
  286.             break;
  287.         }
  288.  
  289.         resolveSingleCharTokenPrecedence(std::get<UniCharOperators>(mOperatorType));
  290.     }
  291. }
  292.  
  293. void Token::resolveSingleCharTokenOperator(UniCharOperators operatorContents, Token* previousToken)
  294. {
  295.     using UCO = UniCharOperators;
  296.  
  297.     switch (operatorContents)
  298.     {
  299.     case UCO::STAR_SYMBOL:
  300.         if (previousToken != nullptr)
  301.         {
  302.             if (*previousToken == TokenGroup::NUMERIC_LITERAL || *previousToken == TokenGroup::USER_LITERAL ||
  303.                 *previousToken == TokenGroup::IDENTIFIER)
  304.                 mOperatorType = UCO::MULTIPLICATION;
  305.             else
  306.                 mOperatorType = UCO::DEREFERENCE;
  307.         }
  308.         else
  309.             mOperatorType = UCO::DEREFERENCE;
  310.  
  311.         break;
  312.  
  313.     case UCO::PLUS_SIGN:
  314.         if (previousToken != nullptr)
  315.         {
  316.             if (*previousToken == TokenGroup::NUMERIC_LITERAL || *previousToken == TokenGroup::USER_LITERAL ||
  317.                 *previousToken == TokenGroup::IDENTIFIER)
  318.                 mOperatorType = UCO::ADDITION;
  319.             else
  320.                 mOperatorType = UCO::UNARY_PLUS;
  321.         }
  322.         else
  323.             mOperatorType = UCO::UNARY_PLUS;
  324.  
  325.         break;
  326.  
  327.     case UCO::MINUS_SIGN:
  328.         if (previousToken != nullptr)
  329.         {
  330.             if (*previousToken == TokenGroup::NUMERIC_LITERAL || *previousToken == TokenGroup::USER_LITERAL ||
  331.                 *previousToken == TokenGroup::IDENTIFIER)
  332.                 mOperatorType = UCO::SUBTRACTION;
  333.             else
  334.                 mOperatorType = UCO::UNARY_NEGATION;
  335.         }
  336.         else
  337.             mOperatorType = UCO::UNARY_NEGATION;
  338.  
  339.         break;
  340.  
  341.     case UCO::AND_SIGN:
  342.         if (previousToken != nullptr)
  343.         {
  344.             if (*previousToken == TokenGroup::NUMERIC_LITERAL || *previousToken == TokenGroup::USER_LITERAL ||
  345.                 *previousToken == TokenGroup::IDENTIFIER)
  346.                 mOperatorType = UCO::BITWISE_AND;
  347.             else
  348.                 mOperatorType = UCO::ADDRESS_ACCESS;
  349.         }
  350.         else
  351.             mOperatorType = UCO::ADDRESS_ACCESS;
  352.  
  353.         break;
  354.  
  355.     default:
  356.         break;
  357.     }
  358. }
  359.  
  360. void Token::resolveSingleCharTokenPrecedence(UniCharOperators operatorContents)
  361. {
  362.     using UCO = UniCharOperators;
  363.  
  364.     switch (operatorContents)
  365.     {
  366.     case UCO::MEMBER_OBJECT_ACCESS:
  367.         mPrecedenceGroup = 16;
  368.         break;
  369.  
  370.     case UCO::BINARY_NOT: case UCO::LOGICAL_NOT: case UCO::UNARY_NEGATION: case UCO::UNARY_PLUS: case UCO::ADDRESS_ACCESS:
  371.     case UCO::DEREFERENCE:
  372.         mPrecedenceGroup = 15;
  373.         break;
  374.  
  375.     case UCO::MULTIPLICATION: case UCO::DIVISION: case UCO::MODULUS:
  376.         mPrecedenceGroup = 12;
  377.         break;
  378.  
  379.     case UCO::ADDITION: case UCO::SUBTRACTION:
  380.         mPrecedenceGroup = 11;
  381.         break;
  382.  
  383.     case UCO::LESS_THAN: case UCO::GREATER_THAN:
  384.         mPrecedenceGroup = 9;
  385.         break;
  386.  
  387.     case UCO::BITWISE_AND:
  388.         mPrecedenceGroup = 8;
  389.         break;
  390.  
  391.     case UCO::BITWISE_EXCLUSIVE_OR:
  392.         mPrecedenceGroup = 7;
  393.         break;
  394.  
  395.     case UCO::BITWISE_INCLUSIVE_OR:
  396.         mPrecedenceGroup = 6;
  397.         break;
  398.  
  399.     case UCO::ASSIGNMENT:
  400.         mPrecedenceGroup = 2;
  401.         break;
  402.  
  403.     case UCO::COMMA:
  404.         mPrecedenceGroup = 0;
  405.         break;
  406.  
  407.     default:
  408.         break;
  409.     }
  410. }
  411.  
  412. void Token::resolveMultiCharTokenOperator(MultiCharOperators operatorContents, Token* previousToken)
  413. {
  414.     using MCO = MultiCharOperators;
  415.  
  416.     switch (operatorContents)
  417.     {
  418.     case MCO::INCREMENT:
  419.         if (previousToken != nullptr)
  420.         {
  421.             if (*previousToken == TokenGroup::NUMERIC_LITERAL || *previousToken == TokenGroup::USER_LITERAL ||
  422.                 *previousToken == TokenGroup::IDENTIFIER)
  423.                 mOperatorType = MCO::POSTFIX_INCREMENT;
  424.             else
  425.                 mOperatorType = MCO::PREFIX_INCREMENT;
  426.         }
  427.         else
  428.             mOperatorType = MCO::PREFIX_INCREMENT;
  429.  
  430.         break;
  431.  
  432.     case MCO::DECREMENT:
  433.         if (previousToken != nullptr)
  434.         {
  435.             if (*previousToken == TokenGroup::NUMERIC_LITERAL || *previousToken == TokenGroup::USER_LITERAL ||
  436.                 *previousToken == TokenGroup::IDENTIFIER)
  437.                 mOperatorType = MCO::POSTFIX_DECREMENT;
  438.             else
  439.                 mOperatorType = MCO::PREFIX_DECREMENT;
  440.         }
  441.         else
  442.             mOperatorType = MCO::PREFIX_DECREMENT;
  443.  
  444.         break;
  445.  
  446.     case MCO::PARANTHESIS:
  447.         if (previousToken != nullptr)
  448.         {
  449.             switch (previousToken->getGroup())
  450.             {
  451.             case TokenGroup::IDENTIFIER:
  452.                 mOperatorType = MCO::FUNCTION_CALL;
  453.                 break;
  454.  
  455.             case TokenGroup::KEYWORD:
  456.                 mOperatorType = MCO::TYPE_CAST;
  457.                 break;
  458.  
  459.             default:
  460.                 break;
  461.             }
  462.         }
  463.         break;
  464.  
  465.     default:
  466.         break;
  467.     }
  468. }
  469.  
  470. void Token::resolveMultiCharTokenPrecedence(MultiCharOperators operatorContents)
  471. {
  472.     using MCO = MultiCharOperators;
  473.  
  474.     switch (operatorContents)
  475.     {
  476.     case MCO::SCOPE_RESOLUTION:
  477.         mPrecedenceGroup = 17;
  478.         break;
  479.  
  480.     case MCO::MEMBER_ACCESS_POINTER: case MCO::ARRAY_SUBSCRIPT: case MCO::FUNCTION_CALL: case MCO::POSTFIX_INCREMENT:
  481.     case MCO::POSTFIX_DECREMENT: case MCO::TYPE_NAME: case MCO::CONST_TYPE_CONERSION: case MCO::DYNAMIC_TYPE_CONVERSION:
  482.     case MCO::REINTERPRETED_TYPE_CONVERSION: case MCO::STATIC_TYPE_CONVERSION:
  483.         mPrecedenceGroup = 16;
  484.         break;
  485.  
  486.     case MCO::SIZE_OF_OBJECT: case MCO::PREFIX_DECREMENT: case MCO::PREFIX_INCREMENT: case MCO::CREATE_HEAP_OBJECT:
  487.     case MCO::DESTROY_HEAP_OBJECT: case MCO::TYPE_CAST:
  488.         mPrecedenceGroup = 15;
  489.         break;
  490.  
  491.     case MCO::MEMBER_ACCESS_POINTER_DEREFERENCE: case MCO::MEMBER_ACCESS_OBJECT_DEREFERENCE:
  492.         mPrecedenceGroup = 14;
  493.         break;
  494.  
  495.     case MCO::BINARY_LEFT_SHIFT: case MCO::BINARY_RIGHT_SHIFT:
  496.         mPrecedenceGroup = 11;
  497.         break;
  498.  
  499.     case MCO::GREATER_THAN_OR_EQUAL_TO: case MCO::LESS_THAN_OR_EQUAL_TO:
  500.         mPrecedenceGroup = 10;
  501.         break;
  502.  
  503.     case MCO::EQUALITY: case MCO::INEQUALITY:
  504.         mPrecedenceGroup = 9;
  505.         break;
  506.  
  507.     case MCO::LOGICAL_AND:
  508.         mPrecedenceGroup = 5;
  509.         break;
  510.  
  511.     case MCO::LOGICAL_OR:
  512.         mPrecedenceGroup = 4;
  513.         break;
  514.  
  515.     case MCO::TERNARY_OPERATOR:
  516.         mPrecedenceGroup = 3;
  517.         break;
  518.  
  519.     case MCO::MULTIPLY_ASSIGNMENT: case MCO::DIVISION_ASSIGNMENT: case MCO::MODULUS_ASSIGNMENT:
  520.     case MCO::ADDITION_ASSIGNMENT: case MCO::SUBTRACTION_ASSIGNMENT: case MCO::LEFT_SHIFT_ASSIGNMENT:
  521.     case MCO::RIGHT_SHIFT_ASSIGNMENT: case MCO::BITWISE_AND_ASSIGNMENT: case MCO::BITWISE_INCLUSIVE_OR_ASSIGNMENT:
  522.     case MCO::BITWISE_EXCLUSIVE_OR_ASSIGNMENT:
  523.         mPrecedenceGroup = 2;
  524.         break;
  525.  
  526.     case MCO::THROW_EXCEPTION:
  527.         mPrecedenceGroup = 1;
  528.         break;
  529.  
  530.     default:
  531.         break;
  532.     }
  533. }
  534.  
  535. std::string Token::getContents()
  536. {
  537.     if (!mContents.index())
  538.         return std::string{ std::get<char>(mContents) };
  539.     else
  540.         return std::get<std::string>(mContents);
  541. }
  542.  
  543. TokenGroup& Token::getGroup()
  544. {
  545.     return mGroup;
  546. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement