Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package jcla.compiler.token;
- import java.util.LinkedList;
- import java.util.List;
- import static jcla.compiler.token.tag.Tags.*;
- /**
- * @author link
- */
- public final class LexicalAnalyzer {
- // private static final Map<String, Tag> KEYWORDS = new HashMap<>();
- // static {
- // // modifiers
- // KEYWORDS.put("public", MODIFIER);
- // KEYWORDS.put("private", MODIFIER);
- // KEYWORDS.put("protected", MODIFIER);
- // KEYWORDS.put("static", MODIFIER);
- // KEYWORDS.put("final", MODIFIER);
- // KEYWORDS.put("synchronized", MODIFIER);
- // KEYWORDS.put("volatile", MODIFIER);
- // KEYWORDS.put("transient", MODIFIER);
- // KEYWORDS.put("native", MODIFIER);
- // KEYWORDS.put("abstract", MODIFIER);
- // KEYWORDS.put("strictfp", MODIFIER);
- //
- //
- // // type modifiers
- // KEYWORDS.put("class", TYPE_MODIFIER);
- // KEYWORDS.put("enum", TYPE_MODIFIER);
- // KEYWORDS.put("interface", TYPE_MODIFIER);
- //
- // // Inheritance
- // KEYWORDS.put("extends", KEYWORD);
- // KEYWORDS.put("implements", KEYWORD);
- // KEYWORDS.put("import", KEYWORD);
- // KEYWORDS.put("super", KEYWORD);
- //
- // // values
- // KEYWORDS.put("null", VALUE);
- // KEYWORDS.put("true", VALUE);
- // KEYWORDS.put("false", VALUE);
- //
- // // primitive types
- // KEYWORDS.put("char", KEYWORD);
- // KEYWORDS.put("boolean", KEYWORD);
- // KEYWORDS.put("byte", KEYWORD);
- // KEYWORDS.put("short", KEYWORD);
- // KEYWORDS.put("int", KEYWORD);
- // KEYWORDS.put("long", KEYWORD);
- // KEYWORDS.put("float", KEYWORD);
- // KEYWORDS.put("double", KEYWORD);
- //
- // // special reference
- // KEYWORDS.put("this", KEYWORD);
- // }
- // WhiteSpace
- private static final char SP = ' ';
- private static final char HT = '\t';
- private static final char FF = '\u000C';
- // LineTerminator
- private static final char LF = '\n';
- private static final char CR = '\r';
- /**
- * Analyzes the given String character by character to create Tokens. If the input was a comment, a List with only
- * null is returned.
- *
- * @param input the input to analyze
- * @return a list of tokens from the input, or null if the input was a comment
- */
- public List<Token> analyze(String input) {
- List<Token> result = new LinkedList<>();
- // create a buffer for the identifier the size of the input.
- // we don't know if the input is a single token or multiple,
- // so we use input.length()
- char[] in = input.toCharArray();
- StringBuilder buffer = new StringBuilder(input.length());
- // single line comment
- boolean singleline = false;
- // multi line comment
- boolean multiline = false;
- char previous = 0;
- for (int i = 0; i < in.length; i++) {
- char current = in[i];
- switch( current ) {
- // comments
- case '/':
- if (previous == '/')
- singleline = true;
- else if (previous == '*') {
- multiline = false;
- }
- break;
- case '*':
- if (previous == '/') {
- multiline = true;
- }
- break;
- // white space
- case CR:
- case LF:
- singleline = false;
- case SP:
- case HT:
- case FF:
- result.add(identify(buffer.toString()));
- break;
- default:
- if (!singleline && !multiline)
- buffer.append(current);
- }
- previous = current;
- }
- return result;
- }
- /**
- * Takes a single input as a String and identifies the token contained in it. If there are multiple tokens in the
- * String, the first token will be returned.
- *
- * @param input the input to analyze
- * @return a token representing the first token in the input
- */
- public Token identify(String input) {
- // Tokens
- //
- // -- identifier
- // IdentifierChars, !Literal
- //
- // -- keyword
- // finite set of character permutations (keywords)
- //
- // -- literal
- // IntegerLiteral, FloatingPointLiteral,
- // BooleanLiteral, CharacterLiteral,
- // StringLiteral, NullLiteral
- //
- // -- separator
- // ( , ) , { , } , [ , ] , ; , . , ... , @ , ::
- //
- // -- operator
- // = > < ! ~ ? : ->
- // == >= <= != && || ++ --
- // + - * / & | ^ % << >> >>>
- // += -= *= /= &= |= ^= %= <<= >>= >>>=
- switch( input ) {
- case "abstract":
- case "assert":
- case "boolean":
- case "break":
- case "byte":
- case "case":
- case "catch":
- case "char":
- case "class":
- case "const":
- case "continue":
- case "default":
- case "do":
- case "double":
- case "else":
- case "enum":
- case "extends":
- case "final":
- case "finally":
- case "float":
- case "for":
- case "if":
- case "goto":
- case "implements":
- case "import":
- case "instanceof":
- case "int":
- case "interface":
- case "long":
- case "native":
- case "new":
- case "package":
- case "private":
- case "protected":
- case "public":
- case "return":
- case "short":
- case "static":
- case "strictfp":
- case "super":
- case "switch":
- case "synchronized":
- case "this":
- case "throw":
- case "throws":
- case "transient":
- case "try":
- case "void":
- case "volatile":
- case "while":
- case "_":
- return new Token(input, KEYWORD);
- case "(":
- case ")":
- case "{":
- case "}":
- case "[":
- case ";":
- case ",":
- case ".":
- case "...":
- case "@":
- case "::":
- return new Token(input, SEPARATOR);
- case "=":
- case ">":
- case "<":
- case "!":
- case "~":
- case "?":
- case ":":
- case "->":
- case "==":
- case ">=":
- case "<=":
- case "!=":
- case "&&":
- case "||":
- case "++":
- case "--":
- case "+":
- case "-":
- case "*":
- case "/":
- case "&":
- case "|":
- case "^":
- case "%":
- case "<<":
- case ">>":
- case ">>>":
- case "+=":
- case "-=":
- case "*=":
- case "/=":
- case "&=":
- case "|=":
- case "^=":
- case "%=":
- case "<<=":
- case ">>=":
- case ">>>=":
- return new Token(input, OPERATOR);
- default:
- return filter(input);
- }
- }
- private static Token filter(String input) {
- boolean digits = false;
- boolean letters = false;
- }
- }
Add Comment
Please, Sign In to add comment