Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import static net.pyrite.lexer.TokenType.*;
- import net.pyrite.exception.SyntaxError;
- public class StateLexer {
- private String stream;
- private int line = 1, column = 1;
- public StateLexer(String stream) {
- this.stream = stream;
- }
- public Token match() {
- int lline = line, lcolumn = column;
- TokenType state = null;
- StringBuilder token = new StringBuilder();
- if (peek() == '\0') return null;
- if (peek('_') || alphabetical()) state = IDENTIFIER;
- else if (digit()) state = INTEGER;
- else if (peek(' ') || peek('\n') || peek('\t')) state = WHITESPACE;
- else if (peek('~')) state = C_BLOCK;
- else if (peek('"')) state = STRING_LITERAL;
- else if (peek('\'')) state = CHAR_LITERAL;
- else if (peek('/') && (npeek(1) == '/' || npeek(1) == '*')) {
- state = COMMENT;
- token.append(pop());
- } else {
- token.append(pop());
- boolean found = false;
- for (TokenType t : TokenType.values()) {
- if (!t.match.equals("") && t.match.equals(token.toString())) found = true;
- }
- if (!found) unexpected(token.toString());
- state = SYMBOL;
- }
- if (state != SYMBOL) token.append(pop());
- while (true) {
- switch (state) {
- case IDENTIFIER:
- if (peek('_') || alphabetical() || digit()) {
- token.append(pop());
- } else if (peek('.')) {
- if (token.toString().endsWith(".")) unexpected(token.toString());
- token.append(pop());
- } else {
- if (token.toString().endsWith(".")) {
- column--;
- unexpected(".");
- } else {
- if (token.toString().indexOf('.') > -1) state = QUALIFIED;
- for (TokenType t : TokenType.values()) {
- if (t.match.equals("")) continue;
- if (t.match.equals(token.toString())) state = t;
- }
- return new Token(state, token.toString(), lline, lcolumn);
- }
- }
- break;
- case INTEGER:
- if (peek('x') || peek('X')) {
- if (token.length() == 1 && token.toString().charAt(0) == '0') {
- token.append(pop());
- } else unexpected(Character.toString(pop()));
- } else {
- if (token.toString().startsWith("0x")) {
- if (hex()) token.append(pop());
- else if (alphabetical()) unexpected(Character.toString(pop()));
- else if (token.length() == 2) {
- expected("hex char", Character.toString(pop()));
- } else {
- return new Token(state, token.toString(), lline, lcolumn);
- }
- } else if (peek('.')) {
- state = DECIMAL;
- token.append(pop());
- } else if (digit()) token.append(pop());
- else if (alphabetical()) unexpected(Character.toString(pop()));
- else return new Token(state, token.toString(), lline, lcolumn);
- }
- break;
- case DECIMAL:
- if (digit()) {
- token.append(pop());
- } else if (alphabetical()) {
- unexpected(Character.toString(pop()));
- } else {
- if (token.toString().endsWith(".")) {
- column--;
- unexpected(".");
- } else return new Token(state, token.toString(), lline, lcolumn);
- }
- break;
- case WHITESPACE: return new Token(WHITESPACE, token.toString(), lline, lcolumn);
- case C_BLOCK:
- if (peek('~')) {
- token.append(pop());
- return new Token(C_BLOCK, token.toString(), lline, lcolumn);
- } else if (peek() == '\0') {
- expected("~", "EOF");
- } else token.append(pop());
- break;
- case STRING_LITERAL:
- if (peek('\\')) {
- token.append(pop());
- if (escape()) {
- token.append(pop());
- } else error("Syntax error, invalid escape '\\" + pop() + "' at " + line + ":" + column + ". Valid escapes are \\' \\\" \\n \\r \\t \\b");
- } else if (peek('"')) {
- token.append(pop());
- return new Token(state, token.toString(), lline, lcolumn);
- } else if (peek() == '\0') expected("\"", "EOF");
- else if (peek('\n')) expected("\"", "\\n");
- else token.append(pop());
- break;
- case CHAR_LITERAL:
- if (peek('\\')) {
- token.append(pop());
- if (escape()) {
- token.append(pop());
- } else error("Syntax error, invalid escape '\\" + pop() + "' at " + line + ":" + column + ". Valid escapes are \\' \\\" \\n \\r \\t \\b");
- } else if (peek('\'')) error("Syntax error, character literal at " + line + ":" + column + " must contain a character.");
- else token.append(pop());
- if (!peek('\'')) expected("'", Character.toString(pop()));
- else {
- token.append(pop());
- return new Token(state, token.toString(), lline, lcolumn);
- }
- error("Critical error, code should never reach here. CHAR_LITERAL.");
- break;
- case COMMENT:
- if (token.toString().startsWith("//")) {
- if (peek('\n') || peek() == '\0') return new Token(COMMENT, token.toString(), lline, lcolumn);
- else token.append(pop());
- } else if (peek('*') && npeek(1) == '/') {
- token.append(pop());
- token.append(pop());
- return new Token(state, token.toString(), lline, lcolumn);
- } else {
- if (peek() == '\0') expected("*/", "EOF");
- token.append(pop());
- }
- break;
- case SYMBOL:
- boolean match = false;
- for (TokenType t : TokenType.values()) {
- if (t.match.equals("")) continue;
- if (t.match.contains(token.toString() + peek())) {
- token.append(pop());
- match = true;
- break;
- }
- }
- if (match) break;
- for (TokenType t : TokenType.values()) {
- if (t.match.equals("")) continue;
- if (t.match.equals(token.toString())) {
- return new Token(t, token.toString(), lline, lcolumn);
- }
- }
- error("Critical error, code should never reach here. SYMBOL: '" + token.toString() + "'");
- break;
- default:
- error("Critical error, unexpected state '" + state.name().toLowerCase() + "' found on matched token.");
- }
- }
- }
- private final boolean alphabetical() {
- return Character.isAlphabetic(peek());
- }
- private final boolean digit() {
- return Character.isDigit(peek());
- }
- private final boolean escape() {
- if (peek('\'') || peek('"') || peek('n') || peek('r') || peek('t') || peek('b') || peek('0')) return true;
- return false;
- }
- private final boolean hex() {
- if (digit() ||
- peek('a') || peek('A') || peek('b') || peek('B') || peek('c') || peek('C') ||
- peek('d') || peek('D') || peek('e') || peek('E') || peek('f') || peek('F')) return true;
- return false;
- }
- private final boolean peek(char c) {
- if (c == '\0' || stream.length() == 0) return false;
- char first = stream.charAt(0);
- if (first == '\0') return false;
- return c == first;
- }
- private final char peek() {
- if (stream.length() == 0) return '\0';
- return stream.charAt(0);
- }
- private final char npeek(int n) {
- if (stream.length() < n + 1) return '\0';
- return stream.charAt(n);
- }
- private final char pop() {
- if (stream.length() > 0) {
- char c = stream.charAt(0);
- if (c == '\n') {
- line++;
- column = 1;
- } else column++;
- stream = stream.substring(1);
- return c;
- } return '\0';
- }
- private final void expected(String expected, String found) {
- error("Syntax error, expected token '" + expected + "' at " + line + ":" + column + " but found '" + found + "'.");
- }
- private final void unexpected(String token) {
- error("Syntax error, unexpected token '" + token + "' at " + line + ":" + column + ". Delete this token.");
- }
- private final void error(String message) {
- throw new SyntaxError(message);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement