Advertisement
Guest User

Untitled

a guest
Feb 21st, 2019
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.78 KB | None | 0 0
  1. public class BconLexer {
  2. enum TokenType {
  3. // Whitespace first b/c it doesnt matter
  4. WHITESPACE("[ \t\f\r]+"),
  5.  
  6. // Values second because number and boolean need to override identifier
  7. STRING("\"[^\"]*\""),
  8. NUMBER("[0-9]+"),
  9. BOOLEAN("true|false"),
  10.  
  11. // Key/Value organization can go last
  12. IDENTIFIER("[a-zA-Z0-9]+"),
  13. SPLITTER(":"),
  14. SEPARATOR("[,\n]"),
  15. OPENER("[{\\[]"),
  16. CLOSER("[}\\]]"),
  17. ;
  18.  
  19. final String pattern;
  20. TokenType(String pattern) {
  21. this.pattern = pattern;
  22. }
  23. }
  24.  
  25. static class Token {
  26. TokenType type;
  27. String data;
  28.  
  29. Token(TokenType type, String data) {
  30. this.type = type;
  31. this.data = data;
  32. }
  33.  
  34. @Override
  35. public String toString() {
  36. return "Token{" +
  37. "type=" + type +
  38. ", data='" + data + '\'' +
  39. '}';
  40. }
  41. }
  42.  
  43. static List<Token> lex(String input) {
  44. List<Token> tokens = new ArrayList<>();
  45.  
  46. StringBuilder tokenPatternsBuffer = new StringBuilder();
  47. for (TokenType type : TokenType.values())
  48. tokenPatternsBuffer.append(String.format("|(?<%s>%s)", type.name(), type.pattern));
  49. Pattern tokenPatterns = Pattern.compile(tokenPatternsBuffer.substring(1));
  50.  
  51. Matcher matcher = tokenPatterns.matcher(input);
  52. while (matcher.find()) {
  53. for (TokenType type : TokenType.values()) {
  54. if (type == TokenType.WHITESPACE)
  55. continue;
  56. if (matcher.group(type.name()) != null)
  57. tokens.add(new Token(type, matcher.group(type.name()).replace("\n", "\\n")));
  58. }
  59.  
  60. return tokens;
  61. }
  62. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement