Guest User

Untitled

a guest
Jul 22nd, 2018
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.59 KB | None | 0 0
  1. #include "bacon/Lexer/Lexer.h"
  2. #include "bacon/Basic/TokenKind.h"
  3. #include "cassert"
  4.  
  5. #define cur data[position]
  6. #define curi data[position++]
  7.  
  8. using namespace bacon;
  9.  
  10. Lexer::Lexer() {
  11. std::string letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPGRSTUVWXYZ_";
  12. for (std::string::iterator it = letters.begin(); it != letters.end(); it++)
  13. types[*it] = Letter;
  14.  
  15. std::string numbers = "0123456789";
  16. for(std::string::iterator it = numbers.begin(); it != numbers.end(); it++)
  17. types[*it] = Number;
  18.  
  19. std::string whitespaces = " \t\n\r";
  20. for(std::string::iterator it = whitespaces.begin();
  21. it != whitespaces.end(); it++)
  22. types[*it] = Whitespace;
  23.  
  24. std::string eof("\0",1);
  25. for(std::string::iterator it = eof.begin(); it != eof.end(); it++)
  26. types[*it] = Eof;
  27.  
  28. std::string symbols = "+-*%/^#=~<>(){}[];:,.\"";
  29. for(std::string::iterator it = symbols.begin(); it != symbols.end(); it++)
  30. types[*it] = Symbol;
  31.  
  32. #define KEYWORD(X) HashTable[#X] = kw_ ## X;
  33. #include "bacon/Basic/TokenKind.def"
  34. }
  35.  
  36. const SourceLocation Lexer::loc(size_t position) const {
  37. return SourceLocation(position, fileID);
  38. }
  39.  
  40. const Token* Lexer::lexTokens(const SourceLocation start,
  41. const llvm::StringRef &data) {
  42. fileID = start.getFileID();
  43. size_t position = start.getLocation();
  44.  
  45. Token *tokens = new Token[data.size()];
  46. size_t index = 0;
  47. Token t;
  48.  
  49. while (position < data.size())
  50. switch (curi) {
  51. case ' ':
  52. case '\n':
  53. case '\t':
  54. continue;
  55.  
  56. case '(':
  57. tokens[index++] = Token(loc(position-1), 1, l_param);
  58. continue;
  59. case ')':
  60. tokens[index++] = Token(loc(position-1), 1, r_param);
  61. continue;
  62. case '{':
  63. tokens[index++] = Token(loc(position-1), 1, l_brace);
  64. continue;
  65. case '}':
  66. tokens[index++] = Token(loc(position-1), 1, r_brace);
  67. continue;
  68. case '[':
  69. tokens[index++] = Token(loc(position-1), 1, l_bracket);
  70. continue;
  71. case ']':
  72. tokens[index++] = Token(loc(position-1), 1, r_bracket);
  73. continue;
  74. case '.':
  75. tokens[index++] = Token(loc(position-1), 1, dot);
  76. continue;
  77. case ';':
  78. tokens[index++] = Token(loc(position-1), 1, seperator);
  79. continue;
  80. case '=':
  81. if (cur == '=')
  82. tokens[index++] = Token(loc(position++-1), 2, equal);
  83. else
  84. tokens[index++] = Token(loc(position-1), 1, assign);
  85. continue;
  86. case '+':
  87. tokens[index++] = Token(loc(position-1), 1, plus);
  88. continue;
  89. case '-':
  90. tokens[index++] = Token(loc(position-1), 1, minus);
  91. continue;
  92. case '*':
  93. tokens[index++] = Token(loc(position-1), 1, mul);
  94. continue;
  95. case '%':
  96. tokens[index++] = Token(loc(position-1), 1, mod);
  97. continue;
  98. case '^':
  99. tokens[index++] = Token(loc(position-1), 1, pow);
  100. continue;
  101. case '#':
  102. tokens[index++] = Token(loc(position-1), 1, sharp);
  103. continue;
  104. case '~':
  105. tokens[index++] = Token(loc(position-1), 1, tilde);
  106. continue;
  107. case '<':
  108. if (cur == '=')
  109. tokens[index++] = Token(loc(position++-1), 2, ltequal);
  110. else
  111. tokens[index++] = Token(loc(position-1), 1, lt);
  112. continue;
  113. case '>':
  114. if (cur == '=')
  115. tokens[index++] = Token(loc(position++-1), 2, gtequal);
  116. else
  117. tokens[index++] = Token(loc(position-1), 1, gt);
  118. continue;
  119. case ':':
  120. tokens[index++] = Token(loc(position-1), 1, colon);
  121. continue;
  122. case ',':
  123. tokens[index++] = Token(loc(position-1), 1, comma);
  124. continue;
  125. case '!':
  126. if (cur == '=')
  127. tokens[index++] = Token(loc(position++-1), 2, notequal);
  128. else
  129. assert(0 && "unimplemented character");
  130. continue;
  131. case '?':
  132. tokens[index++] = Token(loc(position-1), 1, question);
  133. continue;
  134.  
  135. case '/':
  136. if (cur == '/') {
  137. while (curi != '\n');
  138. continue;
  139. }
  140. tokens[index++] = Token(loc(position-1), 1, div);
  141. continue;
  142.  
  143. case '0': case '1': case '2': case '3': case '4':
  144. case '5': case '6': case '7': case '8': case '9':
  145. {
  146. int i = position;
  147. while (types[cur] == Number)
  148. position++;
  149. tokens[index++] = Token(loc(i-1), position-(i-1), number_literal);
  150. continue;
  151. }
  152.  
  153. case '\"':
  154. {
  155. unsigned int pos = position;
  156.  
  157. // scan to the next " and skip in-line \"
  158. while(curi != '\"') {
  159. assert((int)cur && "Missing end \"");
  160.  
  161. if(data[position-1] == '\\')
  162. position++;
  163. }
  164.  
  165. tokens[index++] = Token(loc(pos-1),
  166. position-(pos-1),
  167. string_literal);
  168. continue;
  169. }
  170.  
  171. case '\'':
  172. {
  173. unsigned int pos = position;
  174.  
  175. // scan to the next '
  176. while(curi != '\'')
  177. assert((int)cur && "Missing end \'");
  178.  
  179. tokens[index++] = Token(loc(pos-1),
  180. position-(pos-1),
  181. char_literal);
  182. continue;
  183. }
  184.  
  185. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
  186. case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
  187. case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
  188. case 's': case 't': case 'u': case 'v': case 'w': case 'x':
  189. case 'y': case 'z':
  190. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
  191. case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
  192. case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
  193. case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
  194. case 'Y': case 'Z':
  195. case '_': {
  196. int i = position;
  197. while (types[cur] & (Letter | Number))
  198. position++;
  199.  
  200. HashTableEntryTy &tok = HashTable.GetOrCreateValue(data.slice(i-1, position));
  201. Tok t = tok.getValue();
  202. if (t)
  203. tokens[index++] = Token(loc(i-1), position-(i-1), t);
  204. else {
  205. tok.setValue(identifier);
  206. tokens[index++] = Token(loc(i-1), position-(i-1), identifier);
  207. }
  208. continue;
  209. }
  210.  
  211. case 0:
  212. tokens[index++] = Token(loc(position), 0, eof);
  213. goto Lend;
  214.  
  215. default:
  216. printf("got a %c(%i) at %zu\n", data[position-1], (int)data[position-1], position);
  217. assert(0 && "unimplemented character");
  218.  
  219. }
  220. Lend:
  221. return tokens;
  222. }
Add Comment
Please, Sign In to add comment