Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.IO;
- using System.Text;
- namespace Symlang { /// <summary>Token type.</summary>
- public enum TokenType {
- Ident, // [a-zA-Z_]+
- Number, // [0..9]+
- String, // "..."
- // Structural operators.
- LParen, // (
- RParen, // )
- LBrace, // {
- RBrace, // }
- LBracket, // [
- RBracket, // ]
- Colon, // :
- SemiColon, // ;
- Comma, // ,
- Then, // ->
- Loop, // =>
- // Arithmetic/logic operators.
- Exponent, // ^
- Multiply, // *
- Divide, // /
- Modulus, // %
- Add, // +
- Subtract, // -
- And, // &
- Or, // |
- Not, // ~
- Xor, // .
- LessThan, // <
- LtEq, // <=
- EqualTo, // =
- NotEqual, // /=
- GtEq, // >=
- GrtrThan, // >
- // Miscellaneous operators.
- Assign // <-
- }
- /// <summary>Token.</summary>
- public class Token {
- /// <summary>The type of token.</summary>
- public readonly TokenType Type;
- /// <summary>The token value.</summary>
- public readonly string Value;
- /// <summary>The name of the file the token was in.</summary>
- public readonly string Filename;
- /// <summary>The line the token was on.</summary>
- public readonly int Line;
- /// <summary>The column the token started at.</summary>
- public readonly int Column;
- /// <summary>Constructs <see cref="Symlang.Token"/>.</summary>
- /// <param name='type'>The token type.</param>
- /// <param name='val'>The token value (identifier/number/string only).
- /// </param>
- /// <param name='filename'>The file in which the token was read.</param>
- /// <param name='line'>The line on which the token was read.</param>
- /// <param name='column'>The column at which the token started.</param>
- public Token(TokenType type, string val, string filename, int line,
- int column)
- {
- this.Type = type;
- this.Value = val;
- this.Filename = filename;
- this.Line = line;
- this.Column = column;
- }
- /// <summary>Returns a <see cref="System.String"/> that represents the
- /// <see cref="Symlang.Token"/>.</summary>
- /// <returns>A <see cref="System.String"/> that represents the
- /// <see cref="Symlang.Token"/>.</returns>
- public override string ToString()
- {
- StringBuilder sb = new StringBuilder();
- sb.Append(string.Format("{0}", this.Type.ToString().ToUpper()));
- if ((this.Type == TokenType.Ident) || (this.Type == TokenType.Number))
- sb.Append(string.Format("({0})", this.Value));
- else if (this.Type == TokenType.String)
- sb.Append(string.Format("(\"{0}\")", this.Value));
- return sb.ToString();
- }
- }
- /// <summary>Lexer.</summary>
- public class Lexer {
- /// <summary>The filename.</summary>
- private string Filename;
- /// <summary>The code.</summary>
- private List<string> Code;
- /// <summary>The tokens.</summary>
- private List<Token> Tokens;
- /// <summary>The line.</summary>
- private int Line;
- /// <summary>The column.</summary>
- private int Column;
- /// <summary>The tokenised.</summary>
- private bool Tokenised;
- /// <summary>Constructs <see cref="Symlang.Lexer"/>.</summary>
- public Lexer()
- {
- this.Code = new List<string>();
- this.Tokenised = false;
- }
- /// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename and
- /// a list of lines of code.</summary>
- /// <param name='filename'>The name of the file the code was read from.
- /// </param>
- /// <param name='code'>The list of lines of code.</param>
- public Lexer(string filename, List<string> code)
- {
- this.Filename = String.Format("{0}.{1}", Path.GetFileName(filename),
- Path.GetExtension(filename));
- this.Code = code;
- this.Tokenised = false;
- }
- /// <summary> Constructs <see cref="Symlang.Lexer"/> from a filename and
- /// an array of lines of code.</summary>
- /// <param name='filename'>The name of the file the code was read from.
- /// </param>
- /// <param name='code'>The array of lines of code.</param>
- public Lexer(string filename, string[] code)
- : this(filename, new List<string>(code))
- {
- // Do nothing.
- }
- /// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename and
- /// a string containing code.</summary>
- /// <param name='filename'>The name of the file the code was read from.
- /// </param>
- /// <param name='code'>The string containing the code.</param>
- public Lexer(string filename, string code)
- : this(filename, code.Split('\n'))
- {
- // Do nothing.
- }
- /// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename and
- /// an open StreamReader.</summary>
- /// <param name='filename'>The name of the stream/file.</param>
- /// <param name='stream'>The StreamReader to read code from.</param>
- public Lexer(string filename, StreamReader stream)
- : this(filename, stream.ReadToEnd())
- {
- // Do nothing.
- }
- /// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename and
- /// an open stream.</summary>
- /// <param name='filename'>The name of the stream/file.</param>
- /// <param name='stream'>The stream to read code from.</param>
- public Lexer(string filename, Stream stream)
- : this(filename, new StreamReader(stream))
- {
- // Do nothing.
- }
- /// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename.
- /// </summary>
- /// <param name='filename'>The name of a file to read code from.</param>
- public Lexer(string filename)
- : this(filename, new StreamReader(filename))
- {
- // Do nothing.
- }
- /// <summary>Appends a line to the code.</summary>
- public void AppendLine(string line)
- {
- this.Code.Add(line);
- this.Tokenised = false;
- }
- /// <summary>Adds a token.</summary>
- /// <param name='type'>The token type.</param>
- /// <param name='val'>The value.</param>
- private void AddToken(TokenType type, string val = "")
- {
- this.Tokens.Add(new Token(type, val, this.Filename, this.Line,
- this.Column));
- }
- /// <summary>Adds an ident.</summary>
- /// <param name='line'>The current line.</param>
- private void AddIdent(string line)
- {
- StringBuilder sb = new StringBuilder();
- int i;
- for (i = this.Column; (i < line.Length) &&
- ((Char.IsLetter(line[i])) || (line[i] == '_')); ++i) {
- sb.Append(line[i]);
- }
- this.AddToken(TokenType.Ident, sb.ToString());
- this.Column = i;
- }
- /// <summary>Adds a number.</summary>
- /// <param name='line'>The current line.</param>
- private void AddNumber(string line)
- {
- StringBuilder sb = new StringBuilder();
- bool hasDecimal = false;
- int i;
- for (i = this.Column; (i < line.Length) &&
- ((Char.IsDigit(line[i])) || (line[i] == '.')); ++i) {
- if ((hasDecimal) && (line[i] == '.')) {
- this.Column += 2;
- new SyntaxError(this.Filename, this.Line, this.Column,
- line, "Unexpected '.'");
- return;
- } else if (line[i] == '.') {
- hasDecimal = true;
- }
- sb.Append(line[i]);
- }
- this.AddToken(TokenType.Number, sb.ToString());
- this.Column = i;
- }
- /// <summary>Adds a string.</summary>
- /// <param name='line'>The current line.</param>
- private void AddString(string line)
- {
- StringBuilder sb = new StringBuilder();
- int i;
- for (i = this.Column; (i < line.Length) && (line[i] != '"'); ++i)
- sb.Append(line[i]);
- if (line[i] != '"')
- new SyntaxError(this.Filename, this.Line, this.Column,
- line, "Expected '\"' before end-of-line.");
- this.AddToken(TokenType.String, sb.ToString());
- this.Column = i;
- }
- /// <summary>Tokenises the code.</summary>
- /// <param name='retokenise'>Whether to retokenise the code if it's been
- /// done before. The default behaviour is to return the result of the
- /// previous tokenisation.</param>
- public List<Token> Tokenise(bool retokenise = false)
- {
- if ((retokenise) || (this.Tokens != null) || !(this.Tokenised))
- return this.Tokens;
- bool inComment = false;
- this.Tokens = new List<Token>();
- foreach (string line in this.Code) {
- for (this.Column = 0; this.Column < line.Length; ++this.Column) {
- char symbol = line[this.Column];
- char nextSymbol = (this.Column + 1 < line.Length)
- ? line[this.Column + 1] : '\0';
- if (inComment)
- break;
- switch (symbol) {
- case '\n': case '\t': case ' ':
- break;
- case '(':
- this.AddToken(TokenType.LParen);
- break;
- case ')':
- this.AddToken(TokenType.RParen);
- break;
- case '{':
- this.AddToken(TokenType.LBrace);
- break;
- case '}':
- this.AddToken(TokenType.RBrace);
- break;
- case '[':
- this.AddToken(TokenType.LBracket);
- break;
- case ']':
- this.AddToken(TokenType.RBracket);
- break;
- case ':':
- this.AddToken(TokenType.Colon);
- break;
- case ';':
- this.AddToken(TokenType.SemiColon);
- break;
- case ',':
- this.AddToken(TokenType.Comma);
- break;
- case '^':
- this.AddToken(TokenType.Exponent);
- break;
- case '*':
- this.AddToken(TokenType.Multiply);
- break;
- case '/':
- if (nextSymbol == '/') {
- inComment = true;
- ++this.Column;
- } else if (nextSymbol == '=') {
- this.AddToken(TokenType.NotEqual);
- ++Column;
- } else {
- this.AddToken(TokenType.Divide);
- }
- break;
- case '%':
- this.AddToken(TokenType.Modulus);
- break;
- case '+':
- this.AddToken(TokenType.Add);
- break;
- case '-':
- if (nextSymbol == '>') {
- this.AddToken(TokenType.Then);
- ++this.Column;
- } else {
- this.AddToken(TokenType.Subtract);
- }
- break;
- case '&':
- this.AddToken(TokenType.And);
- break;
- case '|':
- this.AddToken(TokenType.Or);
- break;
- case '~':
- this.AddToken(TokenType.Not);
- break;
- case '.':
- this.AddToken(TokenType.Xor);
- break;
- case '<':
- if (nextSymbol == '-') {
- this.AddToken(TokenType.Assign);
- ++this.Column;
- } else if (nextSymbol == '=') {
- this.AddToken(TokenType.LtEq);
- ++this.Column;
- } else {
- this.AddToken(TokenType.LessThan);
- }
- break;
- case '=':
- if (nextSymbol == '>') {
- this.AddToken(TokenType.Loop);
- ++this.Column;
- } else {
- this.AddToken(TokenType.EqualTo);
- }
- break;
- case '>':
- if (nextSymbol == '=') {
- this.AddToken(TokenType.GtEq);
- } else {
- this.AddToken(TokenType.GrtrThan);
- }
- break;
- default:
- if (char.IsLower(symbol)) {
- this.AddIdent(line);
- --this.Column;
- } else if (char.IsDigit(symbol)) {
- this.AddNumber(line);
- --this.Column;
- } else if (symbol == '"') {
- ++this.Column;
- this.AddString(line);
- } else {
- new SyntaxError(this.Filename, this.Line,
- this.Column, line,
- "Unexpected '{0}'", symbol);
- }
- break;
- }
- }
- inComment = false;
- ++this.Line;
- }
- this.Tokenised = true;
- return this.Tokens;
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement