using System; using System.Collections.Generic; using System.IO; using System.Text; namespace Symlang { /// Token type. public enum TokenType { Ident, // [a-zA-Z_]+ Number, // [0..9]+ String, // "..." // Structural operators. LParen, // ( RParen, // ) LBrace, // { RBrace, // } LBracket, // [ RBracket, // ] Colon, // : SemiColon, // ; Comma, // , Then, // -> Loop, // => // Arithmetic/logic operators. Exponent, // ^ Multiply, // * Divide, // / Modulus, // % Add, // + Subtract, // - And, // & Or, // | Not, // ~ Xor, // . LessThan, // < LtEq, // <= EqualTo, // = NotEqual, // /= GtEq, // >= GrtrThan, // > // Miscellaneous operators. Assign // <- } /// Token. public class Token { /// The type of token. public readonly TokenType Type; /// The token value. public readonly string Value; /// The name of the file the token was in. public readonly string Filename; /// The line the token was on. public readonly int Line; /// The column the token started at. public readonly int Column; /// Constructs . /// The token type. /// The token value (identifier/number/string only). /// /// The file in which the token was read. /// The line on which the token was read. /// The column at which the token started. public Token(TokenType type, string val, string filename, int line, int column) { this.Type = type; this.Value = val; this.Filename = filename; this.Line = line; this.Column = column; } /// Returns a that represents the /// . /// A that represents the /// . public override string ToString() { StringBuilder sb = new StringBuilder(); sb.Append(string.Format("{0}", this.Type.ToString().ToUpper())); if ((this.Type == TokenType.Ident) || (this.Type == TokenType.Number)) sb.Append(string.Format("({0})", this.Value)); else if (this.Type == TokenType.String) sb.Append(string.Format("(\"{0}\")", this.Value)); return sb.ToString(); } } /// Lexer. public class Lexer { /// The filename. private string Filename; /// The code. private List Code; /// The tokens. private List Tokens; /// The line. private int Line; /// The column. private int Column; /// The tokenised. private bool Tokenised; /// Constructs . public Lexer() { this.Code = new List(); this.Tokenised = false; } /// Constructs from a filename and /// a list of lines of code. /// The name of the file the code was read from. /// /// The list of lines of code. public Lexer(string filename, List code) { this.Filename = String.Format("{0}.{1}", Path.GetFileName(filename), Path.GetExtension(filename)); this.Code = code; this.Tokenised = false; } /// Constructs from a filename and /// an array of lines of code. /// The name of the file the code was read from. /// /// The array of lines of code. public Lexer(string filename, string[] code) : this(filename, new List(code)) { // Do nothing. } /// Constructs from a filename and /// a string containing code. /// The name of the file the code was read from. /// /// The string containing the code. public Lexer(string filename, string code) : this(filename, code.Split('\n')) { // Do nothing. } /// Constructs from a filename and /// an open StreamReader. /// The name of the stream/file. /// The StreamReader to read code from. public Lexer(string filename, StreamReader stream) : this(filename, stream.ReadToEnd()) { // Do nothing. } /// Constructs from a filename and /// an open stream. /// The name of the stream/file. /// The stream to read code from. public Lexer(string filename, Stream stream) : this(filename, new StreamReader(stream)) { // Do nothing. } /// Constructs from a filename. /// /// The name of a file to read code from. public Lexer(string filename) : this(filename, new StreamReader(filename)) { // Do nothing. } /// Appends a line to the code. public void AppendLine(string line) { this.Code.Add(line); this.Tokenised = false; } /// Adds a token. /// The token type. /// The value. private void AddToken(TokenType type, string val = "") { this.Tokens.Add(new Token(type, val, this.Filename, this.Line, this.Column)); } /// Adds an ident. /// The current line. private void AddIdent(string line) { StringBuilder sb = new StringBuilder(); int i; for (i = this.Column; (i < line.Length) && ((Char.IsLetter(line[i])) || (line[i] == '_')); ++i) { sb.Append(line[i]); } this.AddToken(TokenType.Ident, sb.ToString()); this.Column = i; } /// Adds a number. /// The current line. private void AddNumber(string line) { StringBuilder sb = new StringBuilder(); bool hasDecimal = false; int i; for (i = this.Column; (i < line.Length) && ((Char.IsDigit(line[i])) || (line[i] == '.')); ++i) { if ((hasDecimal) && (line[i] == '.')) { this.Column += 2; new SyntaxError(this.Filename, this.Line, this.Column, line, "Unexpected '.'"); return; } else if (line[i] == '.') { hasDecimal = true; } sb.Append(line[i]); } this.AddToken(TokenType.Number, sb.ToString()); this.Column = i; } /// Adds a string. /// The current line. private void AddString(string line) { StringBuilder sb = new StringBuilder(); int i; for (i = this.Column; (i < line.Length) && (line[i] != '"'); ++i) sb.Append(line[i]); if (line[i] != '"') new SyntaxError(this.Filename, this.Line, this.Column, line, "Expected '\"' before end-of-line."); this.AddToken(TokenType.String, sb.ToString()); this.Column = i; } /// Tokenises the code. /// Whether to retokenise the code if it's been /// done before. The default behaviour is to return the result of the /// previous tokenisation. public List Tokenise(bool retokenise = false) { if ((retokenise) || (this.Tokens != null) || !(this.Tokenised)) return this.Tokens; bool inComment = false; this.Tokens = new List(); foreach (string line in this.Code) { for (this.Column = 0; this.Column < line.Length; ++this.Column) { char symbol = line[this.Column]; char nextSymbol = (this.Column + 1 < line.Length) ? line[this.Column + 1] : '\0'; if (inComment) break; switch (symbol) { case '\n': case '\t': case ' ': break; case '(': this.AddToken(TokenType.LParen); break; case ')': this.AddToken(TokenType.RParen); break; case '{': this.AddToken(TokenType.LBrace); break; case '}': this.AddToken(TokenType.RBrace); break; case '[': this.AddToken(TokenType.LBracket); break; case ']': this.AddToken(TokenType.RBracket); break; case ':': this.AddToken(TokenType.Colon); break; case ';': this.AddToken(TokenType.SemiColon); break; case ',': this.AddToken(TokenType.Comma); break; case '^': this.AddToken(TokenType.Exponent); break; case '*': this.AddToken(TokenType.Multiply); break; case '/': if (nextSymbol == '/') { inComment = true; ++this.Column; } else if (nextSymbol == '=') { this.AddToken(TokenType.NotEqual); ++Column; } else { this.AddToken(TokenType.Divide); } break; case '%': this.AddToken(TokenType.Modulus); break; case '+': this.AddToken(TokenType.Add); break; case '-': if (nextSymbol == '>') { this.AddToken(TokenType.Then); ++this.Column; } else { this.AddToken(TokenType.Subtract); } break; case '&': this.AddToken(TokenType.And); break; case '|': this.AddToken(TokenType.Or); break; case '~': this.AddToken(TokenType.Not); break; case '.': this.AddToken(TokenType.Xor); break; case '<': if (nextSymbol == '-') { this.AddToken(TokenType.Assign); ++this.Column; } else if (nextSymbol == '=') { this.AddToken(TokenType.LtEq); ++this.Column; } else { this.AddToken(TokenType.LessThan); } break; case '=': if (nextSymbol == '>') { this.AddToken(TokenType.Loop); ++this.Column; } else { this.AddToken(TokenType.EqualTo); } break; case '>': if (nextSymbol == '=') { this.AddToken(TokenType.GtEq); } else { this.AddToken(TokenType.GrtrThan); } break; default: if (char.IsLower(symbol)) { this.AddIdent(line); --this.Column; } else if (char.IsDigit(symbol)) { this.AddNumber(line); --this.Column; } else if (symbol == '"') { ++this.Column; this.AddString(line); } else { new SyntaxError(this.Filename, this.Line, this.Column, line, "Unexpected '{0}'", symbol); } break; } } inComment = false; ++this.Line; } this.Tokenised = true; return this.Tokens; } } }