Untitled

public class BconLexer {
    enum TokenType {
        // Whitespace first b/c it doesnt matter
        WHITESPACE("[ \t\f\r]+"),

        // Values second because number and boolean need to override identifier
        STRING("\"[^\"]*\""),
        NUMBER("[0-9]+"),
        BOOLEAN("true|false"),

        // Key/Value organization can go last
        IDENTIFIER("[a-zA-Z0-9]+"),
        SPLITTER(":"),
        SEPARATOR("[,\n]"),
        OPENER("[{\\[]"),
        CLOSER("[}\\]]"),
        ;

        final String pattern;
        TokenType(String pattern) {
            this.pattern = pattern;
        }
    }

    static class Token {
        TokenType type;
        String data;

        Token(TokenType type, String data) {
            this.type = type;
            this.data = data;
        }

        @Override
        public String toString() {
            return "Token{" +
                    "type=" + type +
                    ", data='" + data + '\'' +
                    '}';
        }
    }

    static List<Token> lex(String input) {
        List<Token> tokens = new ArrayList<>();

        StringBuilder tokenPatternsBuffer = new StringBuilder();
        for (TokenType type : TokenType.values())
            tokenPatternsBuffer.append(String.format("|(?<%s>%s)", type.name(), type.pattern));
        Pattern tokenPatterns = Pattern.compile(tokenPatternsBuffer.substring(1));

        Matcher matcher = tokenPatterns.matcher(input);
        while (matcher.find()) {
            for (TokenType type : TokenType.values()) {
                if (type == TokenType.WHITESPACE)
                    continue;
                if (matcher.group(type.name()) != null)
                    tokens.add(new Token(type, matcher.group(type.name()).replace("\n", "\\n")));
            }

        return tokens;
    }
}