Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public class BconLexer {
- enum TokenType {
- // Whitespace first b/c it doesnt matter
- WHITESPACE("[ \t\f\r]+"),
- // Values second because number and boolean need to override identifier
- STRING("\"[^\"]*\""),
- NUMBER("[0-9]+"),
- BOOLEAN("true|false"),
- // Key/Value organization can go last
- IDENTIFIER("[a-zA-Z0-9]+"),
- SPLITTER(":"),
- SEPARATOR("[,\n]"),
- OPENER("[{\\[]"),
- CLOSER("[}\\]]"),
- ;
- final String pattern;
- TokenType(String pattern) {
- this.pattern = pattern;
- }
- }
- static class Token {
- TokenType type;
- String data;
- Token(TokenType type, String data) {
- this.type = type;
- this.data = data;
- }
- @Override
- public String toString() {
- return "Token{" +
- "type=" + type +
- ", data='" + data + '\'' +
- '}';
- }
- }
- static List<Token> lex(String input) {
- List<Token> tokens = new ArrayList<>();
- StringBuilder tokenPatternsBuffer = new StringBuilder();
- for (TokenType type : TokenType.values())
- tokenPatternsBuffer.append(String.format("|(?<%s>%s)", type.name(), type.pattern));
- Pattern tokenPatterns = Pattern.compile(tokenPatternsBuffer.substring(1));
- Matcher matcher = tokenPatterns.matcher(input);
- while (matcher.find()) {
- for (TokenType type : TokenType.values()) {
- if (type == TokenType.WHITESPACE)
- continue;
- if (matcher.group(type.name()) != null)
- tokens.add(new Token(type, matcher.group(type.name()).replace("\n", "\\n")));
- }
- return tokens;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement