Guest User

Untitled

a guest
Nov 19th, 2017
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.77 KB | None | 0 0
  1. package jcla.compiler.token;
  2.  
  3. import java.util.LinkedList;
  4. import java.util.List;
  5.  
  6. import static jcla.compiler.token.tag.Tags.*;
  7.  
  8. /**
  9. * @author link
  10. */
  11. public final class LexicalAnalyzer {
  12.  
  13. // private static final Map<String, Tag> KEYWORDS = new HashMap<>();
  14.  
  15. // static {
  16. // // modifiers
  17. // KEYWORDS.put("public", MODIFIER);
  18. // KEYWORDS.put("private", MODIFIER);
  19. // KEYWORDS.put("protected", MODIFIER);
  20. // KEYWORDS.put("static", MODIFIER);
  21. // KEYWORDS.put("final", MODIFIER);
  22. // KEYWORDS.put("synchronized", MODIFIER);
  23. // KEYWORDS.put("volatile", MODIFIER);
  24. // KEYWORDS.put("transient", MODIFIER);
  25. // KEYWORDS.put("native", MODIFIER);
  26. // KEYWORDS.put("abstract", MODIFIER);
  27. // KEYWORDS.put("strictfp", MODIFIER);
  28. //
  29. //
  30. // // type modifiers
  31. // KEYWORDS.put("class", TYPE_MODIFIER);
  32. // KEYWORDS.put("enum", TYPE_MODIFIER);
  33. // KEYWORDS.put("interface", TYPE_MODIFIER);
  34. //
  35. // // Inheritance
  36. // KEYWORDS.put("extends", KEYWORD);
  37. // KEYWORDS.put("implements", KEYWORD);
  38. // KEYWORDS.put("import", KEYWORD);
  39. // KEYWORDS.put("super", KEYWORD);
  40. //
  41. // // values
  42. // KEYWORDS.put("null", VALUE);
  43. // KEYWORDS.put("true", VALUE);
  44. // KEYWORDS.put("false", VALUE);
  45. //
  46. // // primitive types
  47. // KEYWORDS.put("char", KEYWORD);
  48. // KEYWORDS.put("boolean", KEYWORD);
  49. // KEYWORDS.put("byte", KEYWORD);
  50. // KEYWORDS.put("short", KEYWORD);
  51. // KEYWORDS.put("int", KEYWORD);
  52. // KEYWORDS.put("long", KEYWORD);
  53. // KEYWORDS.put("float", KEYWORD);
  54. // KEYWORDS.put("double", KEYWORD);
  55. //
  56. // // special reference
  57. // KEYWORDS.put("this", KEYWORD);
  58. // }
  59.  
  60. // WhiteSpace
  61. private static final char SP = ' ';
  62. private static final char HT = '\t';
  63. private static final char FF = '\u000C';
  64.  
  65. // LineTerminator
  66. private static final char LF = '\n';
  67. private static final char CR = '\r';
  68.  
  69. /**
  70. * Analyzes the given String character by character to create Tokens. If the input was a comment, a List with only
  71. * null is returned.
  72. *
  73. * @param input the input to analyze
  74. * @return a list of tokens from the input, or null if the input was a comment
  75. */
  76. public List<Token> analyze(String input) {
  77. List<Token> result = new LinkedList<>();
  78. // create a buffer for the identifier the size of the input.
  79. // we don't know if the input is a single token or multiple,
  80. // so we use input.length()
  81. char[] in = input.toCharArray();
  82.  
  83. StringBuilder buffer = new StringBuilder(input.length());
  84.  
  85. // single line comment
  86. boolean singleline = false;
  87. // multi line comment
  88. boolean multiline = false;
  89.  
  90. char previous = 0;
  91.  
  92. for (int i = 0; i < in.length; i++) {
  93. char current = in[i];
  94.  
  95. switch( current ) {
  96. // comments
  97. case '/':
  98. if (previous == '/')
  99. singleline = true;
  100. else if (previous == '*') {
  101. multiline = false;
  102. }
  103.  
  104. break;
  105. case '*':
  106. if (previous == '/') {
  107. multiline = true;
  108. }
  109.  
  110. break;
  111. // white space
  112. case CR:
  113. case LF:
  114. singleline = false;
  115. case SP:
  116. case HT:
  117. case FF:
  118. result.add(identify(buffer.toString()));
  119. break;
  120. default:
  121. if (!singleline && !multiline)
  122. buffer.append(current);
  123. }
  124.  
  125. previous = current;
  126. }
  127.  
  128. return result;
  129. }
  130.  
  131. /**
  132. * Takes a single input as a String and identifies the token contained in it. If there are multiple tokens in the
  133. * String, the first token will be returned.
  134. *
  135. * @param input the input to analyze
  136. * @return a token representing the first token in the input
  137. */
  138. public Token identify(String input) {
  139. // Tokens
  140. //
  141. // -- identifier
  142. // IdentifierChars, !Literal
  143. //
  144. // -- keyword
  145. // finite set of character permutations (keywords)
  146. //
  147. // -- literal
  148. // IntegerLiteral, FloatingPointLiteral,
  149. // BooleanLiteral, CharacterLiteral,
  150. // StringLiteral, NullLiteral
  151. //
  152. // -- separator
  153. // ( , ) , { , } , [ , ] , ; , . , ... , @ , ::
  154. //
  155. // -- operator
  156. // = > < ! ~ ? : ->
  157. // == >= <= != && || ++ --
  158. // + - * / & | ^ % << >> >>>
  159. // += -= *= /= &= |= ^= %= <<= >>= >>>=
  160.  
  161. switch( input ) {
  162. case "abstract":
  163. case "assert":
  164. case "boolean":
  165. case "break":
  166. case "byte":
  167. case "case":
  168. case "catch":
  169. case "char":
  170. case "class":
  171. case "const":
  172. case "continue":
  173. case "default":
  174. case "do":
  175. case "double":
  176. case "else":
  177. case "enum":
  178. case "extends":
  179. case "final":
  180. case "finally":
  181. case "float":
  182. case "for":
  183. case "if":
  184. case "goto":
  185. case "implements":
  186. case "import":
  187. case "instanceof":
  188. case "int":
  189. case "interface":
  190. case "long":
  191. case "native":
  192. case "new":
  193. case "package":
  194. case "private":
  195. case "protected":
  196. case "public":
  197. case "return":
  198. case "short":
  199. case "static":
  200. case "strictfp":
  201. case "super":
  202. case "switch":
  203. case "synchronized":
  204. case "this":
  205. case "throw":
  206. case "throws":
  207. case "transient":
  208. case "try":
  209. case "void":
  210. case "volatile":
  211. case "while":
  212. case "_":
  213. return new Token(input, KEYWORD);
  214. case "(":
  215. case ")":
  216. case "{":
  217. case "}":
  218. case "[":
  219. case ";":
  220. case ",":
  221. case ".":
  222. case "...":
  223. case "@":
  224. case "::":
  225. return new Token(input, SEPARATOR);
  226. case "=":
  227. case ">":
  228. case "<":
  229. case "!":
  230. case "~":
  231. case "?":
  232. case ":":
  233. case "->":
  234. case "==":
  235. case ">=":
  236. case "<=":
  237. case "!=":
  238. case "&&":
  239. case "||":
  240. case "++":
  241. case "--":
  242. case "+":
  243. case "-":
  244. case "*":
  245. case "/":
  246. case "&":
  247. case "|":
  248. case "^":
  249. case "%":
  250. case "<<":
  251. case ">>":
  252. case ">>>":
  253. case "+=":
  254. case "-=":
  255. case "*=":
  256. case "/=":
  257. case "&=":
  258. case "|=":
  259. case "^=":
  260. case "%=":
  261. case "<<=":
  262. case ">>=":
  263. case ">>>=":
  264. return new Token(input, OPERATOR);
  265. default:
  266. return filter(input);
  267. }
  268. }
  269.  
  270. private static Token filter(String input) {
  271. boolean digits = false;
  272. boolean letters = false;
  273. }
  274.  
  275. }
Add Comment
Please, Sign In to add comment