using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace Symlang { /// <summary>Token type.</summary>
public enum TokenType {
Ident, // [a-zA-Z_]+
Number, // [0..9]+
String, // "..."
// Structural operators.
LParen, // (
RParen, // )
LBrace, // {
RBrace, // }
LBracket, // [
RBracket, // ]
Colon, // :
SemiColon, // ;
Comma, // ,
Then, // ->
Loop, // =>
// Arithmetic/logic operators.
Exponent, // ^
Multiply, // *
Divide, // /
Modulus, // %
Add, // +
Subtract, // -
And, // &
Or, // |
Not, // ~
Xor, // .
LessThan, // <
LtEq, // <=
EqualTo, // =
NotEqual, // /=
GtEq, // >=
GrtrThan, // >
// Miscellaneous operators.
Assign // <-
}
/// <summary>Token.</summary>
public class Token {
/// <summary>The type of token.</summary>
public readonly TokenType Type;
/// <summary>The token value.</summary>
public readonly string Value;
/// <summary>The name of the file the token was in.</summary>
public readonly string Filename;
/// <summary>The line the token was on.</summary>
public readonly int Line;
/// <summary>The column the token started at.</summary>
public readonly int Column;
/// <summary>Constructs <see cref="Symlang.Token"/>.</summary>
/// <param name='type'>The token type.</param>
/// <param name='val'>The token value (identifier/number/string only).
/// </param>
/// <param name='filename'>The file in which the token was read.</param>
/// <param name='line'>The line on which the token was read.</param>
/// <param name='column'>The column at which the token started.</param>
public Token(TokenType type, string val, string filename, int line,
int column)
{
this.Type = type;
this.Value = val;
this.Filename = filename;
this.Line = line;
this.Column = column;
}
/// <summary>Returns a <see cref="System.String"/> that represents the
/// <see cref="Symlang.Token"/>.</summary>
/// <returns>A <see cref="System.String"/> that represents the
/// <see cref="Symlang.Token"/>.</returns>
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append(string.Format("{0}", this.Type.ToString().ToUpper()));
if ((this.Type == TokenType.Ident) || (this.Type == TokenType.Number))
sb.Append(string.Format("({0})", this.Value));
else if (this.Type == TokenType.String)
sb.Append(string.Format("(\"{0}\")", this.Value));
return sb.ToString();
}
}
/// <summary>Lexer.</summary>
public class Lexer {
/// <summary>The filename.</summary>
private string Filename;
/// <summary>The code.</summary>
private List<string> Code;
/// <summary>The tokens.</summary>
private List<Token> Tokens;
/// <summary>The line.</summary>
private int Line;
/// <summary>The column.</summary>
private int Column;
/// <summary>The tokenised.</summary>
private bool Tokenised;
/// <summary>Constructs <see cref="Symlang.Lexer"/>.</summary>
public Lexer()
{
this.Code = new List<string>();
this.Tokenised = false;
}
/// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename and
/// a list of lines of code.</summary>
/// <param name='filename'>The name of the file the code was read from.
/// </param>
/// <param name='code'>The list of lines of code.</param>
public Lexer(string filename, List<string> code)
{
this.Filename = String.Format("{0}.{1}", Path.GetFileName(filename),
Path.GetExtension(filename));
this.Code = code;
this.Tokenised = false;
}
/// <summary> Constructs <see cref="Symlang.Lexer"/> from a filename and
/// an array of lines of code.</summary>
/// <param name='filename'>The name of the file the code was read from.
/// </param>
/// <param name='code'>The array of lines of code.</param>
public Lexer(string filename, string[] code)
: this(filename, new List<string>(code))
{
// Do nothing.
}
/// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename and
/// a string containing code.</summary>
/// <param name='filename'>The name of the file the code was read from.
/// </param>
/// <param name='code'>The string containing the code.</param>
public Lexer(string filename, string code)
: this(filename, code.Split('\n'))
{
// Do nothing.
}
/// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename and
/// an open StreamReader.</summary>
/// <param name='filename'>The name of the stream/file.</param>
/// <param name='stream'>The StreamReader to read code from.</param>
public Lexer(string filename, StreamReader stream)
: this(filename, stream.ReadToEnd())
{
// Do nothing.
}
/// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename and
/// an open stream.</summary>
/// <param name='filename'>The name of the stream/file.</param>
/// <param name='stream'>The stream to read code from.</param>
public Lexer(string filename, Stream stream)
: this(filename, new StreamReader(stream))
{
// Do nothing.
}
/// <summary>Constructs <see cref="Symlang.Lexer"/> from a filename.
/// </summary>
/// <param name='filename'>The name of a file to read code from.</param>
public Lexer(string filename)
: this(filename, new StreamReader(filename))
{
// Do nothing.
}
/// <summary>Appends a line to the code.</summary>
public void AppendLine(string line)
{
this.Code.Add(line);
this.Tokenised = false;
}
/// <summary>Adds a token.</summary>
/// <param name='type'>The token type.</param>
/// <param name='val'>The value.</param>
private void AddToken(TokenType type, string val = "")
{
this.Tokens.Add(new Token(type, val, this.Filename, this.Line,
this.Column));
}
/// <summary>Adds an ident.</summary>
/// <param name='line'>The current line.</param>
private void AddIdent(string line)
{
StringBuilder sb = new StringBuilder();
int i;
for (i = this.Column; (i < line.Length) &&
((Char.IsLetter(line[i])) || (line[i] == '_')); ++i) {
sb.Append(line[i]);
}
this.AddToken(TokenType.Ident, sb.ToString());
this.Column = i;
}
/// <summary>Adds a number.</summary>
/// <param name='line'>The current line.</param>
private void AddNumber(string line)
{
StringBuilder sb = new StringBuilder();
bool hasDecimal = false;
int i;
for (i = this.Column; (i < line.Length) &&
((Char.IsDigit(line[i])) || (line[i] == '.')); ++i) {
if ((hasDecimal) && (line[i] == '.')) {
this.Column += 2;
new SyntaxError(this.Filename, this.Line, this.Column,
line, "Unexpected '.'");
return;
} else if (line[i] == '.') {
hasDecimal = true;
}
sb.Append(line[i]);
}
this.AddToken(TokenType.Number, sb.ToString());
this.Column = i;
}
/// <summary>Adds a string.</summary>
/// <param name='line'>The current line.</param>
private void AddString(string line)
{
StringBuilder sb = new StringBuilder();
int i;
for (i = this.Column; (i < line.Length) && (line[i] != '"'); ++i)
sb.Append(line[i]);
if (line[i] != '"')
new SyntaxError(this.Filename, this.Line, this.Column,
line, "Expected '\"' before end-of-line.");
this.AddToken(TokenType.String, sb.ToString());
this.Column = i;
}
/// <summary>Tokenises the code.</summary>
/// <param name='retokenise'>Whether to retokenise the code if it's been
/// done before. The default behaviour is to return the result of the
/// previous tokenisation.</param>
public List<Token> Tokenise(bool retokenise = false)
{
if ((retokenise) || (this.Tokens != null) || !(this.Tokenised))
return this.Tokens;
bool inComment = false;
this.Tokens = new List<Token>();
foreach (string line in this.Code) {
for (this.Column = 0; this.Column < line.Length; ++this.Column) {
char symbol = line[this.Column];
char nextSymbol = (this.Column + 1 < line.Length)
? line[this.Column + 1] : '\0';
if (inComment)
break;
switch (symbol) {
case '\n': case '\t': case ' ':
break;
case '(':
this.AddToken(TokenType.LParen);
break;
case ')':
this.AddToken(TokenType.RParen);
break;
case '{':
this.AddToken(TokenType.LBrace);
break;
case '}':
this.AddToken(TokenType.RBrace);
break;
case '[':
this.AddToken(TokenType.LBracket);
break;
case ']':
this.AddToken(TokenType.RBracket);
break;
case ':':
this.AddToken(TokenType.Colon);
break;
case ';':
this.AddToken(TokenType.SemiColon);
break;
case ',':
this.AddToken(TokenType.Comma);
break;
case '^':
this.AddToken(TokenType.Exponent);
break;
case '*':
this.AddToken(TokenType.Multiply);
break;
case '/':
if (nextSymbol == '/') {
inComment = true;
++this.Column;
} else if (nextSymbol == '=') {
this.AddToken(TokenType.NotEqual);
++Column;
} else {
this.AddToken(TokenType.Divide);
}
break;
case '%':
this.AddToken(TokenType.Modulus);
break;
case '+':
this.AddToken(TokenType.Add);
break;
case '-':
if (nextSymbol == '>') {
this.AddToken(TokenType.Then);
++this.Column;
} else {
this.AddToken(TokenType.Subtract);
}
break;
case '&':
this.AddToken(TokenType.And);
break;
case '|':
this.AddToken(TokenType.Or);
break;
case '~':
this.AddToken(TokenType.Not);
break;
case '.':
this.AddToken(TokenType.Xor);
break;
case '<':
if (nextSymbol == '-') {
this.AddToken(TokenType.Assign);
++this.Column;
} else if (nextSymbol == '=') {
this.AddToken(TokenType.LtEq);
++this.Column;
} else {
this.AddToken(TokenType.LessThan);
}
break;
case '=':
if (nextSymbol == '>') {
this.AddToken(TokenType.Loop);
++this.Column;
} else {
this.AddToken(TokenType.EqualTo);
}
break;
case '>':
if (nextSymbol == '=') {
this.AddToken(TokenType.GtEq);
} else {
this.AddToken(TokenType.GrtrThan);
}
break;
default:
if (char.IsLower(symbol)) {
this.AddIdent(line);
--this.Column;
} else if (char.IsDigit(symbol)) {
this.AddNumber(line);
--this.Column;
} else if (symbol == '"') {
++this.Column;
this.AddString(line);
} else {
new SyntaxError(this.Filename, this.Line,
this.Column, line,
"Unexpected '{0}'", symbol);
}
break;
}
}
inComment = false;
++this.Line;
}
this.Tokenised = true;
return this.Tokens;
}
}
}