Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.util.ArrayList;
- import java.util.regex.Pattern;
- import java.util.regex.Matcher;
- public class Lexer {
- public static enum TokenType {
- // Definitions of accepted tokens
- IF("if"), WHILE("while"), PRINT("print"), TYPE("int|string|boolean"), BOOLOP("==|!="), BOOLVAL("false|true"), INTOP("[+]"), CHAR("[a-z]"), DIGIT("[0-9]"), WHITESPACE("[ \t\f\r\n]+"), LPAREN("[(]"), RPAREN("[)]");
- public final String pattern;
- private TokenType(String pattern) {
- this.pattern = pattern;
- }
- }
- public static class Token {
- public TokenType type;
- public String data;
- public Token(TokenType type, String data) {
- this.type = type;
- this.data = data;
- }
- }
- public static ArrayList<Token> lex(String input) {
- // The tokens to return
- ArrayList<Token> tokens = new ArrayList<Token>();
- // allows us to work with a mutable string
- StringBuffer tokenPatternsBuffer = new StringBuffer();
- for (TokenType tokenType : TokenType.values()) {
- tokenPatternsBuffer.append(String.format("|(?<%s>%s)", tokenType.name(), tokenType.pattern));
- }
- Pattern tokenPatterns = Pattern.compile(new String(tokenPatternsBuffer.substring(1)));
- // Begin matching tokens
- Matcher matcher = tokenPatterns.matcher(input);
- while (matcher.find()) {
- if (matcher.group(TokenType.DIGIT.name()) != null) {
- tokens.add(new Token(TokenType.DIGIT, matcher.group(TokenType.DIGIT.name())));
- continue;
- } else if (matcher.group(TokenType.IF.name()) != null) {
- tokens.add(new Token(TokenType.IF, matcher.group(TokenType.IF.name())));
- continue;
- } else if (matcher.group(TokenType.WHILE.name()) != null) {
- tokens.add(new Token(TokenType.WHILE, matcher.group(TokenType.WHILE.name())));
- continue;
- } else if (matcher.group(TokenType.TYPE.name()) != null) {
- tokens.add(new Token(TokenType.TYPE, matcher.group(TokenType.TYPE.name())));
- continue;
- } else if (matcher.group(TokenType.PRINT.name()) != null) {
- tokens.add(new Token(TokenType.PRINT, matcher.group(TokenType.PRINT.name())));
- continue;
- } else if (matcher.group(TokenType.BOOLOP.name()) != null) {
- tokens.add(new Token(TokenType.BOOLOP, matcher.group(TokenType.BOOLOP.name())));
- continue;
- } else if (matcher.group(TokenType.BOOLVAL.name()) != null) {
- tokens.add(new Token(TokenType.BOOLVAL, matcher.group(TokenType.BOOLVAL.name())));
- continue;
- } else if (matcher.group(TokenType.INTOP.name()) != null) {
- tokens.add(new Token(TokenType.INTOP, matcher.group(TokenType.INTOP.name())));
- continue;
- } else if (matcher.group(TokenType.CHAR.name()) != null) {
- tokens.add(new Token(TokenType.CHAR, matcher.group(TokenType.CHAR.name())));
- continue;
- } else if (matcher.group(TokenType.LPAREN.name()) != null) {
- tokens.add(new Token(TokenType.LPAREN, matcher.group(TokenType.LPAREN.name())));
- continue;
- } else if (matcher.group(TokenType.RPAREN.name()) != null) {
- tokens.add(new Token(TokenType.RPAREN, matcher.group(TokenType.RPAREN.name())));
- continue;
- } else if (matcher.group(TokenType.WHITESPACE.name()) != null) {
- continue;
- }
- }
- return tokens;
- }
- public static void main(String[] args) {
- String input = "if (int 5) abc @";
- // Create tokens and print them
- ArrayList<Token> tokens = lex(input);
- for (Token token : tokens)
- System.out.println("DEBUG Lexer - " + token.type + " [ " + token.data + " ] " + "found at " + "linenumber");
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement