Advertisement
Guest User

Lexer.java

a guest
Jan 31st, 2018
23
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 7.47 KB | None | 0 0
  1. import java.util.ArrayList;
  2. import java.util.List;
  3.  
  4. import static net.pyrite.lexer.TokenType.*;
  5.  
  6. import net.pyrite.exception.SyntaxError;
  7.  
  8. public class Lexer {
  9.  
  10.     private String stream;
  11.     private int line = 1, column = 1;
  12.    
  13.     private List<Token> tokens;
  14.    
  15.     public Lexer(String input) {
  16.         this.stream = input;
  17.         this.tokens = new ArrayList<Token>();
  18.     }
  19.  
  20.     public final void tokenise() {
  21.         Token token = null;
  22.         while (true) {
  23.             if (token != null) tokens.add(token);
  24.             if ((token = whitespace()) != null || (token = comment()) != null) {
  25.                 token = null;
  26.                 continue;
  27.             }
  28.             if ((token = identifier()) != null) continue;
  29.             if ((token = string()) != null) continue;
  30.             if ((token = character()) != null) continue;
  31.             if ((token = cblock()) != null) continue;
  32.             if ((token = number()) != null) continue;
  33.             if ((token = symbol()) != null) continue;
  34.             if (peek() != '\0') unexpected(pop());
  35.             break;
  36.         }
  37.     }
  38.    
  39.     public final List<Token> getOutput() {
  40.         return tokens;
  41.     }
  42.    
  43.     private final Token whitespace() {
  44.         if (!peek(' ') && !peek('\t') && !peek('\n')) return null;
  45.         pop();
  46.         return new Token(null, null, 0, 0);
  47.     }
  48.  
  49.     private final Token comment() {
  50.         if (peek('/')) {
  51.             if (npeek(1) == '/') {
  52.                 pop();
  53.                 while (true) {
  54.                     pop();
  55.                     if (peek('\n') || stream.length() == 0) {
  56.                         pop();
  57.                         return new Token(null, null, 0, 0);
  58.                     }
  59.                 }
  60.             } else if (npeek(1) == '*') {
  61.                 pop();
  62.                 while (true) {
  63.                     pop();
  64.                     if (peek() == '\0') {System.out.println(stream);
  65.                         expected('*', '\0');
  66.                     }
  67.                     else if (peek('*') && npeek(1) == '/') {
  68.                         pop();
  69.                         pop();
  70.                         return new Token(null, null, 0, 0);
  71.                     }
  72.                 }
  73.             } else return null;
  74.         }
  75.         return null;
  76.     }
  77.    
  78.     private final Token identifier() {
  79.         if (!alphabetical() && !peek('_')) return null;
  80.         int line = this.line, column = this.column;
  81.         StringBuilder identifier = new StringBuilder(Character.toString(check(pop())));
  82.         boolean qualifying = false, qualified = false;
  83.         while (true) {
  84.             if (alphabetical() || digit() || peek('_')) qualifying = false;
  85.             else if (peek('.')) {
  86.                 qualified = true;
  87.                 if (!qualifying) qualifying = true;
  88.                 else unexpected(check(pop()));
  89.             } else break;
  90.             identifier.append(check(pop()));
  91.         }
  92.         return new Token((qualified ? QUALIFIED : IDENTIFIER), identifier.toString(), line, column);
  93.     }
  94.    
  95.     private final Token string() {
  96.         if (!peek('"')) return null;
  97.         int line = this.line, column = this.column;
  98.         StringBuilder string = new StringBuilder(Character.toString(check(pop())));
  99.         while (true) {
  100.             if (peek('\\')) {
  101.                 pop();
  102.                 if (!escape()) error("Syntax error, unsupported escape sequence '" + pop() + "'. Supported sequences are '\"nrtb0");
  103.                 else string.append("\\" + check(pop()));
  104.             } else if (peek('"')) {
  105.                 string.append(check(pop()));
  106.                 return new Token(STRING_LITERAL, string.toString(), line, column);
  107.             } else if (peek('\0')) check(pop());
  108.             else string.append(check(pop()));
  109.         }
  110.     }
  111.    
  112.     private final Token character() {
  113.         if (!peek('\'')) return null;
  114.         int line = this.line, column = this.column;
  115.         StringBuilder character = new StringBuilder(Character.toString(check(pop())));
  116.         if (peek('\\')) {
  117.             pop();
  118.             if (!escape()) error("Syntax error, unsupported escape sequence '\\" + pop() + "' at " + line + ":" + column + ". Supported sequences are '\"nrtb0");
  119.             else character.append("\\" + check(pop()));
  120.         } else if (peek('\'')) error("Syntax error, character literal at " + line + ":" + column + " cannot be empty.");
  121.         else character.append(check(pop()));
  122.         if (!peek('\'')) expected('\'', pop());
  123.         else character.append(check(pop()));
  124.         return new Token(CHAR_LITERAL, character.toString(), line, column);
  125.     }
  126.    
  127.     private final Token cblock() {
  128.         if (!peek('~')) return null;
  129.         int line = this.line, column = this.column;
  130.         StringBuilder block = new StringBuilder(Character.toString(check(pop())));
  131.         while (!peek('~') && peek() != '\0') {
  132.             block.append(pop());
  133.         }
  134.         block.append(check(pop()));
  135.         return new Token(C_BLOCK, block.toString(), line, column);
  136.     }
  137.        
  138.     private final Token number() {
  139.         if (!digit()) return null;
  140.         int line = this.line, column = this.column;
  141.         StringBuilder number = new StringBuilder(Character.toString(check(pop())));
  142.         if (peek('x') || peek('X')) {
  143.             if (!number.toString().equals("0")) error("Syntax error, hexadecimal numbers require a '0' before the 'x' at " + line + ":" + column + ".");
  144.             number.append(pop());
  145.             while (true) {
  146.                 if (hex()) number.append(pop());
  147.                 else return new Token(INTEGER, number.toString(), line, column);
  148.             }
  149.         } else {
  150.             boolean decimal = false;
  151.             while (true) {
  152.                 if (digit()) number.append(check(pop()));
  153.                 else if (peek('.')) {
  154.                     if (decimal) unexpected('.');
  155.                     decimal = true;
  156.                     number.append(check(pop()));
  157.                 } else {
  158.                     return new Token((decimal ? DECIMAL : INTEGER), number.toString(), line, column);
  159.                 }
  160.             }
  161.         }
  162.     }
  163.    
  164.     private final Token symbol() {
  165.         StringBuilder symbol = new StringBuilder();
  166.         TokenType type = null;
  167.         int line = this.line, column = this.column;
  168.         while (true) {
  169.             boolean match = false;
  170.             for (TokenType t : TokenType.values()) {
  171.                 if (t.match == null || t.match.equals("")) continue;
  172.                 if (t.match.equals(symbol.toString() + peek())) {
  173.                     match = true;
  174.                     type = t;
  175.                     symbol.append(pop());
  176.                     break;
  177.                 }
  178.             }
  179.             if (!match) break;
  180.         }
  181.         if (symbol.length() > 0) return new Token(type, symbol.toString(), line, column);
  182.         return null;
  183.     }
  184.    
  185.     private final boolean alphabetical() {
  186.         return Character.isAlphabetic(peek());
  187.     }
  188.    
  189.     private final boolean digit() {
  190.         return Character.isDigit(peek());
  191.     }
  192.    
  193.     private final boolean escape() {
  194.         if (peek('\'') || peek('"') || peek('n') || peek('r') || peek('t') || peek('b') || peek('0')) return true;
  195.         return false;
  196.     }
  197.    
  198.     private final boolean hex() {
  199.         if (digit() ||
  200.             peek('a') || peek('A') || peek('b') || peek('B') || peek('c') || peek('C') ||
  201.             peek('d') || peek('D') || peek('e') || peek('E') || peek('f') || peek('F')) return true;
  202.         return false;
  203.     }
  204.    
  205.     private final boolean peek(char c) {
  206.         if (c == '\0' || stream.length() == 0) return false;
  207.         char first = stream.charAt(0);
  208.         if (first == '\0') return false;
  209.         return c == first;
  210.     }
  211.    
  212.     private final char peek() {
  213.         if (stream.length() == 0) return '\0';
  214.         return stream.charAt(0);
  215.     }
  216.    
  217.     private final char npeek(int n) {
  218.         if (stream.length() < n + 1) return '\0';
  219.         return stream.charAt(n);
  220.     }
  221.    
  222.     private final char pop() {
  223.         if (stream.length() > 0) {
  224.             char c = stream.charAt(0);
  225.             if (c == '\n') {
  226.                 line++;
  227.                 column = 1;
  228.             } else column++;
  229.             stream = stream.substring(1);
  230.             return c;
  231.         } return '\0';
  232.     }
  233.    
  234.     // TODO: Is SyntaxError a good choice for critical errors?
  235.     private final char check(char c) {
  236.         if (c == '\0') error("Syntax error, expected a character but found EOF.\n" +
  237.                             "Please contact a developer as this is an unexpected error.");
  238.         return c;
  239.     }
  240.    
  241.     private final void expected(char c, char found) {
  242.         error("Syntax error, expected character '" + c + "' but found '" +
  243.                             (found == '\0' ? "EOF" : found) + "'. Replace this character.");
  244.     }
  245.    
  246.     // TODO: Figure out why (column - 1) is required.
  247.     private final void unexpected(char c) {
  248.         error("Syntax error, unexpected character '" + c + "' at " + line + ":" + (column - 1) +
  249.                             ". Delete this character.");
  250.     }
  251.    
  252.     private final void error(String message) {
  253.         throw new SyntaxError(message);
  254.     }
  255.    
  256. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement