Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.util.ArrayList;
- import java.util.List;
- import static net.pyrite.lexer.TokenType.*;
- import net.pyrite.exception.SyntaxError;
- public class Lexer {
- private String stream;
- private int line = 1, column = 1;
- private List<Token> tokens;
- public Lexer(String input) {
- this.stream = input;
- this.tokens = new ArrayList<Token>();
- }
- public final void tokenise() {
- Token token = null;
- while (true) {
- if (token != null) tokens.add(token);
- if ((token = whitespace()) != null || (token = comment()) != null) {
- token = null;
- continue;
- }
- if ((token = identifier()) != null) continue;
- if ((token = string()) != null) continue;
- if ((token = character()) != null) continue;
- if ((token = cblock()) != null) continue;
- if ((token = number()) != null) continue;
- if ((token = symbol()) != null) continue;
- if (peek() != '\0') unexpected(pop());
- break;
- }
- }
- public final List<Token> getOutput() {
- return tokens;
- }
- private final Token whitespace() {
- if (!peek(' ') && !peek('\t') && !peek('\n')) return null;
- pop();
- return new Token(null, null, 0, 0);
- }
- private final Token comment() {
- if (peek('/')) {
- if (npeek(1) == '/') {
- pop();
- while (true) {
- pop();
- if (peek('\n') || stream.length() == 0) {
- pop();
- return new Token(null, null, 0, 0);
- }
- }
- } else if (npeek(1) == '*') {
- pop();
- while (true) {
- pop();
- if (peek() == '\0') {System.out.println(stream);
- expected('*', '\0');
- }
- else if (peek('*') && npeek(1) == '/') {
- pop();
- pop();
- return new Token(null, null, 0, 0);
- }
- }
- } else return null;
- }
- return null;
- }
- private final Token identifier() {
- if (!alphabetical() && !peek('_')) return null;
- int line = this.line, column = this.column;
- StringBuilder identifier = new StringBuilder(Character.toString(check(pop())));
- boolean qualifying = false, qualified = false;
- while (true) {
- if (alphabetical() || digit() || peek('_')) qualifying = false;
- else if (peek('.')) {
- qualified = true;
- if (!qualifying) qualifying = true;
- else unexpected(check(pop()));
- } else break;
- identifier.append(check(pop()));
- }
- return new Token((qualified ? QUALIFIED : IDENTIFIER), identifier.toString(), line, column);
- }
- private final Token string() {
- if (!peek('"')) return null;
- int line = this.line, column = this.column;
- StringBuilder string = new StringBuilder(Character.toString(check(pop())));
- while (true) {
- if (peek('\\')) {
- pop();
- if (!escape()) error("Syntax error, unsupported escape sequence '" + pop() + "'. Supported sequences are '\"nrtb0");
- else string.append("\\" + check(pop()));
- } else if (peek('"')) {
- string.append(check(pop()));
- return new Token(STRING_LITERAL, string.toString(), line, column);
- } else if (peek('\0')) check(pop());
- else string.append(check(pop()));
- }
- }
- private final Token character() {
- if (!peek('\'')) return null;
- int line = this.line, column = this.column;
- StringBuilder character = new StringBuilder(Character.toString(check(pop())));
- if (peek('\\')) {
- pop();
- if (!escape()) error("Syntax error, unsupported escape sequence '\\" + pop() + "' at " + line + ":" + column + ". Supported sequences are '\"nrtb0");
- else character.append("\\" + check(pop()));
- } else if (peek('\'')) error("Syntax error, character literal at " + line + ":" + column + " cannot be empty.");
- else character.append(check(pop()));
- if (!peek('\'')) expected('\'', pop());
- else character.append(check(pop()));
- return new Token(CHAR_LITERAL, character.toString(), line, column);
- }
- private final Token cblock() {
- if (!peek('~')) return null;
- int line = this.line, column = this.column;
- StringBuilder block = new StringBuilder(Character.toString(check(pop())));
- while (!peek('~') && peek() != '\0') {
- block.append(pop());
- }
- block.append(check(pop()));
- return new Token(C_BLOCK, block.toString(), line, column);
- }
- private final Token number() {
- if (!digit()) return null;
- int line = this.line, column = this.column;
- StringBuilder number = new StringBuilder(Character.toString(check(pop())));
- if (peek('x') || peek('X')) {
- if (!number.toString().equals("0")) error("Syntax error, hexadecimal numbers require a '0' before the 'x' at " + line + ":" + column + ".");
- number.append(pop());
- while (true) {
- if (hex()) number.append(pop());
- else return new Token(INTEGER, number.toString(), line, column);
- }
- } else {
- boolean decimal = false;
- while (true) {
- if (digit()) number.append(check(pop()));
- else if (peek('.')) {
- if (decimal) unexpected('.');
- decimal = true;
- number.append(check(pop()));
- } else {
- return new Token((decimal ? DECIMAL : INTEGER), number.toString(), line, column);
- }
- }
- }
- }
- private final Token symbol() {
- StringBuilder symbol = new StringBuilder();
- TokenType type = null;
- int line = this.line, column = this.column;
- while (true) {
- boolean match = false;
- for (TokenType t : TokenType.values()) {
- if (t.match == null || t.match.equals("")) continue;
- if (t.match.equals(symbol.toString() + peek())) {
- match = true;
- type = t;
- symbol.append(pop());
- break;
- }
- }
- if (!match) break;
- }
- if (symbol.length() > 0) return new Token(type, symbol.toString(), line, column);
- return null;
- }
- private final boolean alphabetical() {
- return Character.isAlphabetic(peek());
- }
- private final boolean digit() {
- return Character.isDigit(peek());
- }
- private final boolean escape() {
- if (peek('\'') || peek('"') || peek('n') || peek('r') || peek('t') || peek('b') || peek('0')) return true;
- return false;
- }
- private final boolean hex() {
- if (digit() ||
- peek('a') || peek('A') || peek('b') || peek('B') || peek('c') || peek('C') ||
- peek('d') || peek('D') || peek('e') || peek('E') || peek('f') || peek('F')) return true;
- return false;
- }
- private final boolean peek(char c) {
- if (c == '\0' || stream.length() == 0) return false;
- char first = stream.charAt(0);
- if (first == '\0') return false;
- return c == first;
- }
- private final char peek() {
- if (stream.length() == 0) return '\0';
- return stream.charAt(0);
- }
- private final char npeek(int n) {
- if (stream.length() < n + 1) return '\0';
- return stream.charAt(n);
- }
- private final char pop() {
- if (stream.length() > 0) {
- char c = stream.charAt(0);
- if (c == '\n') {
- line++;
- column = 1;
- } else column++;
- stream = stream.substring(1);
- return c;
- } return '\0';
- }
- // TODO: Is SyntaxError a good choice for critical errors?
- private final char check(char c) {
- if (c == '\0') error("Syntax error, expected a character but found EOF.\n" +
- "Please contact a developer as this is an unexpected error.");
- return c;
- }
- private final void expected(char c, char found) {
- error("Syntax error, expected character '" + c + "' but found '" +
- (found == '\0' ? "EOF" : found) + "'. Replace this character.");
- }
- // TODO: Figure out why (column - 1) is required.
- private final void unexpected(char c) {
- error("Syntax error, unexpected character '" + c + "' at " + line + ":" + (column - 1) +
- ". Delete this character.");
- }
- private final void error(String message) {
- throw new SyntaxError(message);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement