Advertisement
Guest User

Untitled

a guest
Feb 1st, 2019
190
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 3.67 KB | None | 0 0
  1. import java.util.ArrayList;
  2. import java.util.regex.Pattern;
  3. import java.util.regex.Matcher;
  4.  
  5. public class Lexer {
  6.   public static enum TokenType {
  7.     // Definitions of accepted tokens
  8.     IF("if"), WHILE("while"), PRINT("print"), TYPE("int|string|boolean"), BOOLOP("==|!="), BOOLVAL("false|true"), INTOP("[+]"), CHAR("[a-z]"), DIGIT("[0-9]"), WHITESPACE("[ \t\f\r\n]+"), LPAREN("[(]"), RPAREN("[)]");
  9.     public final String pattern;
  10.  
  11.     private TokenType(String pattern) {
  12.       this.pattern = pattern;
  13.     }
  14.   }
  15.  
  16.   public static class Token {
  17.     public TokenType type;
  18.     public String data;
  19.  
  20.     public Token(TokenType type, String data) {
  21.       this.type = type;
  22.       this.data = data;
  23.     }
  24.   }
  25.  
  26.   public static ArrayList<Token> lex(String input) {
  27.     // The tokens to return
  28.     ArrayList<Token> tokens = new ArrayList<Token>();
  29.  
  30.     // allows us to work with a mutable string
  31.     StringBuffer tokenPatternsBuffer = new StringBuffer();
  32.    
  33.     for (TokenType tokenType : TokenType.values()) {
  34.       tokenPatternsBuffer.append(String.format("|(?<%s>%s)", tokenType.name(), tokenType.pattern));
  35.     }
  36.     Pattern tokenPatterns = Pattern.compile(new String(tokenPatternsBuffer.substring(1)));
  37.  
  38.     // Begin matching tokens
  39.     Matcher matcher = tokenPatterns.matcher(input);
  40.       while (matcher.find()) {
  41.         if (matcher.group(TokenType.DIGIT.name()) != null) {
  42.           tokens.add(new Token(TokenType.DIGIT, matcher.group(TokenType.DIGIT.name())));
  43.           continue;
  44.       } else if (matcher.group(TokenType.IF.name()) != null) {
  45.           tokens.add(new Token(TokenType.IF, matcher.group(TokenType.IF.name())));
  46.           continue;
  47.       } else if (matcher.group(TokenType.WHILE.name()) != null) {
  48.           tokens.add(new Token(TokenType.WHILE, matcher.group(TokenType.WHILE.name())));
  49.           continue;
  50.       } else if (matcher.group(TokenType.TYPE.name()) != null) {
  51.           tokens.add(new Token(TokenType.TYPE, matcher.group(TokenType.TYPE.name())));
  52.           continue;
  53.       } else if (matcher.group(TokenType.PRINT.name()) != null) {
  54.           tokens.add(new Token(TokenType.PRINT, matcher.group(TokenType.PRINT.name())));
  55.           continue;
  56.       } else if (matcher.group(TokenType.BOOLOP.name()) != null) {
  57.           tokens.add(new Token(TokenType.BOOLOP, matcher.group(TokenType.BOOLOP.name())));
  58.           continue;
  59.       } else if (matcher.group(TokenType.BOOLVAL.name()) != null) {
  60.           tokens.add(new Token(TokenType.BOOLVAL, matcher.group(TokenType.BOOLVAL.name())));
  61.           continue;
  62.       } else if (matcher.group(TokenType.INTOP.name()) != null) {
  63.           tokens.add(new Token(TokenType.INTOP, matcher.group(TokenType.INTOP.name())));
  64.           continue;
  65.       } else if (matcher.group(TokenType.CHAR.name()) != null) {
  66.         tokens.add(new Token(TokenType.CHAR, matcher.group(TokenType.CHAR.name())));
  67.         continue;
  68.       } else if (matcher.group(TokenType.LPAREN.name()) != null) {
  69.           tokens.add(new Token(TokenType.LPAREN, matcher.group(TokenType.LPAREN.name())));
  70.           continue;
  71.       } else if (matcher.group(TokenType.RPAREN.name()) != null) {
  72.           tokens.add(new Token(TokenType.RPAREN, matcher.group(TokenType.RPAREN.name())));
  73.           continue;  
  74.       } else if (matcher.group(TokenType.WHITESPACE.name()) != null) {
  75.           continue;
  76.       }
  77.     }
  78.     return tokens;
  79.   }
  80.  
  81.   public static void main(String[] args) {
  82.     String input = "if (int 5) abc @";
  83.     // Create tokens and print them
  84.     ArrayList<Token> tokens = lex(input);
  85.     for (Token token : tokens)
  86.       System.out.println("DEBUG Lexer - " + token.type + " [ " + token.data + " ] " + "found at " + "linenumber");
  87.   }
  88. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement