Guest User

Untitled

a guest
Nov 28th, 2018
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.63 KB | None | 0 0
  1. //
  2. // Created by ottomated on 27/11/18.
  3. //
  4.  
  5. #include "Tokenizer.h"
  6. #include <ctype.h>
  7. #include <string>
  8. #include <unordered_map>
  9.  
  10. using namespace std;
  11.  
  12. Tokenizer::Tokenizer(string p) : program(std::move(p)), currentToken(Token(Empty, 0)), pos(0) {}
  13.  
  14. Token Tokenizer::getNextToken() {
  15. int i = pos;
  16. pos++;
  17. if (i > program.size()) {
  18. currentToken = Token(Eof);
  19. return currentToken;
  20. }
  21. char c = program[i];
  22. while (isspace(c)) {
  23. i++;
  24. pos++;
  25. c = program[i];
  26. }
  27. if (!c) {
  28. currentToken = Token(Eof);
  29. return currentToken;
  30. }
  31. // Find single chars that can't be confused
  32. unordered_map<char, TokenType> singleChars = {
  33. {'(', LeftParen},
  34. {')', RightParen},
  35. {'[', LeftBracket},
  36. {']', RightBracket},
  37. {'{', LeftBrace},
  38. {'}', RightBrace},
  39. {';', Semicolon},
  40. {':', Colon},
  41. {',', Comma},
  42. {'.', Dot},
  43. {'!', Not},
  44. };
  45. auto it = singleChars.find(c);
  46.  
  47. if (it != singleChars.end()) {
  48. currentToken = Token(it->second);
  49. return currentToken;
  50. }
  51.  
  52.  
  53. // Find 2-char tokens that could be confused
  54. string nextTwo = program.substr(i, 2);
  55. {
  56. unordered_map<string, TokenType> doubleChars = {
  57. {"++", Increment},
  58. {"--", Decrement},
  59. {"+=", PlusEquals},
  60. {"-=", MinusEquals},
  61. {"*=", MultiplyEquals},
  62. {"/=", DivideEquals},
  63. {"%=", ModuloEquals},
  64. {"<=", LessThanEquals},
  65. {">=", GreaterThanEquals},
  66. {"==", DoubleEquals},
  67. {"&&", And},
  68. {"||", Or},
  69. };
  70. auto it = doubleChars.find(nextTwo);
  71.  
  72. if (it != doubleChars.
  73.  
  74. end()
  75.  
  76. ) {
  77. pos++; // Go one more forward
  78. currentToken = Token(it->second);
  79. return
  80. currentToken;
  81. }
  82. }
  83. // Find confusable single chars
  84. {
  85. unordered_map<char, TokenType> singleChars = {
  86. {'+', Plus},
  87. {'-', Minus},
  88. {'*', Multiply},
  89. {'/', Divide},
  90. {'%', Modulo},
  91. {'=', SingleEquals},
  92. {'<', LessThan},
  93. {'>', GreaterThan},
  94. {'&', BinaryAnd},
  95. {'|', BinaryOr},
  96. {'^', BinaryXor},
  97. };
  98. auto it = singleChars.find(c);
  99.  
  100. if (it != singleChars.
  101.  
  102. end()
  103.  
  104. ) {
  105. currentToken = Token(it->second);
  106. return
  107. currentToken;
  108. }
  109. }
  110. string parse;
  111. // Integers
  112. if (isdigit(c)) {
  113. while (isdigit(c)) {
  114. parse += c;
  115. pos++;
  116. i++;
  117. c = program[i];
  118. }
  119. currentToken = Token(Integer, stoi(parse));
  120. return currentToken;
  121. }
  122. // Strings
  123. if (c == '"') {
  124. bool isEscaping = false;
  125. do {
  126. pos++;
  127. i++;
  128. c = program[i];
  129. parse += c;
  130. } while (program[pos] != '"');
  131. pos++;
  132. currentToken = Token(String, parse);
  133. return currentToken;
  134. }
  135. const string validNameChars = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM_-";
  136. while (validNameChars.find(c) != validNameChars.npos) {
  137. parse += c;
  138. i++;
  139. pos++;
  140. c = program[i];
  141. }
  142. if (parse.size() == 0) {
  143. currentToken = Token(Empty);
  144. } else {
  145. currentToken = Token(Name, parse);
  146. return currentToken;
  147. }
  148. }
Add Comment
Please, Sign In to add comment