- /*
- Copyright (c) 2012, Alexander Wood (unsafeIO)
- All rights reserved.
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- 1. Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Globalization;
- using System.IO;
- namespace Assembler
- {
- public enum TokenType
- {
- Word, Number, Comma, Label, Plus, OpenBracket, CloseBracket, Eof, NewLine
- }
- struct Token
- {
- public TokenType Type;
- public string Datum;
- }
- //Will choke on non-ASCII characters. TODO: Unicode
- class Lexer
- {
- private readonly StreamReader _stream;
- private uint _lineNumber = 1;
- private uint _columnNumber = 0;
- public Lexer(StreamReader stream)
- {
- _stream = stream;
- }
- private bool IsEof()
- {
- return _stream.Peek() == -1;
- }
- private int Read()
- {
- _columnNumber++;
- if (_stream.Peek() == '\n')
- {
- _lineNumber++;
- _columnNumber = 1;
- }
- return _stream.Read();
- }
- private int Peek()
- {
- return _stream.Peek();
- }
- public void Bail(string reason)
- {
- Console.WriteLine("Line {0} Col {1} -- {2}", _lineNumber, _columnNumber, reason);
- throw new Exception();
- }
- private Token? _peekToken = null;
- public Token PeekToken()
- {
- if (_peekToken == null)
- _peekToken = ReadToken();
- return (Token)_peekToken;
- }
- public Token ReadToken()
- {
- if (_peekToken != null)
- {
- var ret = _peekToken;
- _peekToken = null;
- return (Token)ret;
- }
- //Skip Whitespace
- int @byte;
- do
- {
- @byte = Read();
- if (@byte == -1)
- return new Token { Type = TokenType.Eof };
- //Skip comments
- if (@byte == ';')
- while (@byte != '\n' && @byte != -1)
- @byte = Read();
- } while (Char.IsWhiteSpace((char)@byte) && @byte != '\n');
- if (@byte == '[')
- return new Token { Type = TokenType.OpenBracket };
- if (@byte == ']')
- return new Token { Type = TokenType.CloseBracket };
- if (@byte == '+')
- return new Token { Type = TokenType.Plus };
- if (@byte == ',')
- return new Token { Type = TokenType.Comma };
- if (@byte == '\n')
- return new Token { Type = TokenType.NewLine };
- if (@byte == ':')
- {
- var token = new Token { Type = TokenType.Label };
- if (IsEof() || !Char.IsLetter((char)Peek()))
- {
- Bail("Lexer: Label with invalid name (or name is not present)");
- }
- while (!IsEof() && Char.IsLetterOrDigit((char)Peek()))
- {
- token.Datum += (char)Read();
- }
- return token;
- }
- if (Char.IsNumber((char)@byte))
- {
- var token = new Token { Type = TokenType.Number, Datum = ((char)@byte).ToString() };
- //Hexadecimal
- if (@byte == '0' && Peek() == 'x')
- {
- token.Datum += (char)Read();
- while (!IsEof() &&
- (('0' <= Peek() && Peek() <= '9') ||
- ('a' <= Peek() && Peek() <= 'f') ||
- ('A' <= Peek() && Peek() <= 'F')))
- token.Datum += (char)Read();
- if (token.Datum == "0x")
- Bail("Lexer: Bare hexadecimal prefix (0x)");
- return token;
- }
- //Binary
- if (@byte == '0' && Peek() == 'b')
- {
- token.Datum += (char)Read();
- while (!IsEof() && (Peek() == '0' || Peek() == '1'))
- token.Datum += (char)Read();
- if (token.Datum == "0b")
- Bail("Lexer: Bare binary prefix (0b)");
- return token;
- }
- //Decimal
- while (!IsEof() && Char.IsNumber((char)Peek()))
- token.Datum += (char)Read();
- return token;
- }
- if (Char.IsLetter((char)@byte))
- {
- var token = new Token { Type = TokenType.Word, Datum = ((char)@byte).ToString() };
- while (!IsEof() && Char.IsLetterOrDigit((char)Peek()))
- token.Datum += (char)Read();
- return token;
- }
- Bail(string.Format("Lexer: Unknown character {0} in stream", (char)@byte));
- return new Token();
- }
- }
- enum InstructionOpcode
- {
- }
- enum Register
- {
- A = 0, B = 1, C = 2, X = 3, Y = 4, Z = 5, I = 6, J = 7,
- POP = 0x18, PEEK = 0x19, PUSH = 0x1a,
- SP = 0x1b, PC = 0x1c, O = 0x1d,
- Number = -1,
- Label = -2
- }
- class Parser
- {
- private readonly Dictionary<string, ushort> _opCodes = new Dictionary<string, ushort>
- {
- {"SET", 1},
- {"ADD", 2},
- {"SUB", 3},
- {"MUL", 4},
- {"DIV", 5},
- {"MOD", 6},
- {"SHL", 7},
- {"SHR", 8},
- {"AND", 9},
- {"BOR", 0xa},
- {"XOR", 0xb},
- {"IFE", 0xc},
- {"IFN", 0xd},
- {"IFG", 0xe},
- {"IFB", 0xf}
- };
- private readonly Dictionary<string, int> _registers = new Dictionary<string, int>
- {
- {"A", 0},
- {"B", 1},
- {"C", 2},
- {"X", 3},
- {"Y", 4},
- {"Z", 5},
- {"I", 6},
- {"J", 7}
- };
- //Addressing Modes
- //(Using a static class for scoping purposes. It is evil and you should never do it.)
- private static class AddressingMode
- {
- public const int Register = 0; //0x00-0x07, register
- public const int IndirectRegister = 0x08; //0x08-0x0f, [register]
- public const int IndexedRegister = 0x10; //0x10-0x17, [register + next word]
- //0x18 - 0x1d
- public static readonly Dictionary<string, int> SpecialRegisters = new Dictionary<string, int>
- {
- {"POP", 0x18},
- {"PEEK", 0x19},
- {"PUSH", 0x1a},
- {"SP", 0x1b},
- {"PC", 0x1c},
- {"O", 0x1d}
- };
- public const int MemoryLocation = 0x1e; //0x1e, [next word]
- public const int LiteralNumber = 0x1f; //0x1f, next word (literal)
- public const int SmallLiteral = 0x20; //0x20-0x3f, literal value 0x00-0x1f (literal)
- }
- // Bookkeeping
- private readonly Lexer _lexer;
- private readonly Dictionary<string, int> _labels = new Dictionary<string, int>();
- private readonly Dictionary<string, List<int>> _backRefsToDo = new Dictionary<string, List<int>>();
- // Public Methods
- public Parser(Lexer lexer)
- {
- _lexer = lexer;
- }
- public ushort[] Parse()
- {
- return DoTheThing();
- }
- // Everything below this point relates to parsing
- //Decimal or Hexadecimal or Binary
- private ushort parseNumber()
- {
- return parseNumber(_lexer.ReadToken());
- }
- private ushort parseNumber(Token token)
- {
- if (token.Type != TokenType.Number)
- _lexer.Bail("Expected a number here.");
- if (token.Datum.Length > 2 && token.Datum[0] == '0' && token.Datum[1] == 'b')
- {
- //Binary Number!
- return Convert.ToUInt16(token.Datum.Substring(2), 2); // Strip two character prefix, Base 2
- }
- else if (token.Datum.Length > 2 && token.Datum[0] == '0' && token.Datum[1] == 'x')
- {
- //Hexadecimal Number!
- //Strip two character prefix.
- return ushort.Parse(token.Datum.Substring(2), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
- }
- else
- {
- //Decimal Number!
- return ushort.Parse(token.Datum);
- }
- }
- private bool isLabel(Token token)
- {
- //Make sure a label doesn't use a reserved word!
- return (token.Type == TokenType.Word && !_registers.ContainsKey(token.Datum) &&
- !AddressingMode.SpecialRegisters.ContainsKey(token.Datum));
- }
- private bool isRegister(Token token)
- {
- return token.Type == TokenType.Word && _registers.ContainsKey(token.Datum);
- }
- private bool isSpecialRegister(Token token)
- {
- return token.Type == TokenType.Word && AddressingMode.SpecialRegisters.ContainsKey(token.Datum);
- }
- //If label is not avaliable yet, we'll add it as a to-do in _backRefsToDo
- private void insertLabel(ref List<ushort> outputProgramCode, string label)
- {
- label = label.ToUpper();
- if (_labels.ContainsKey(label))
- {
- //We already know the label, simple and easy.
- outputProgramCode.Add((ushort)_labels[label]);
- return;
- }
- //We don't know the label, insert it as a to-do
- if (!_backRefsToDo.ContainsKey(label))
- _backRefsToDo[label] = new List<int>();
- _backRefsToDo[label].Add(outputProgramCode.Count); //Insert current position
- outputProgramCode.Add(0xFFFF); //Add a sentinel to the output, which we'll overwrite later.
- }
- private Token ExpectToken(string errorMessage, params TokenType[] type)
- {
- var token = _lexer.ReadToken();
- if (!type.Contains(token.Type))
- _lexer.Bail(errorMessage);
- return token;
- }
- private Token ExpectToken(string errorMessage, params Func<Token, bool>[] p)
- {
- var token = _lexer.ReadToken();
- if (!p.Select(x => x(token)).Contains(true))
- _lexer.Bail(errorMessage);
- return token;
- }
- //Number or Register or [ Register ] or [ Number ] or [ Register + Number ]
- //Returns Value: The appropriate Loc for that adressing scheme
- //Appends words to the end of the program
- private int parseAdressingScheme(ref List<ushort> outputProgramCode)
- {
- var token = _lexer.ReadToken();
- switch (token.Type)
- {
- case TokenType.Number:
- // A simple number literal.
- // If the number is less than 0x20, we can use the short version.
- {
- ushort number = parseNumber(token);
- if (number < AddressingMode.SmallLiteral)
- return number + AddressingMode.SmallLiteral;
- else
- {
- outputProgramCode.Add(number);
- return AddressingMode.LiteralNumber;
- }
- }
- case TokenType.Word:
- // We expect a word from either _register (A,B,C,X,...) or SpecialRegisters (POP, PEEK, ...) or a label
- token.Datum = token.Datum.ToUpper();
- if (isSpecialRegister(token))
- {
- return AddressingMode.SpecialRegisters[token.Datum];
- }
- else if (isRegister(token))
- {
- return _registers[token.Datum];
- }
- else if (isLabel(token))
- {
- insertLabel(ref outputProgramCode, token.Datum);
- return AddressingMode.LiteralNumber;
- }
- _lexer.Bail("Unknown Word: Expected name of register or special register or label");
- return -1;
- case TokenType.OpenBracket:
- //Here the possibilities are:
- // [ Number ]
- // [ Number + Register ]
- // [ Register ]
- // [ Register + Number ]
- // [ Label ]
- // [ Label + Register ]
- // [ Register + Label ]
- {
- if (_lexer.PeekToken().Type == TokenType.Number)
- {
- // [ Number ] OR [ Number + Register ]
- ushort number = parseNumber();
- var thirdToken = _lexer.ReadToken();
- if (thirdToken.Type == TokenType.CloseBracket)
- {
- // [ Number ]
- outputProgramCode.Add(number);
- return AddressingMode.MemoryLocation;
- }
- else if (thirdToken.Type == TokenType.Plus)
- {
- // [ Number + Register ]
- var registerToken = _lexer.ReadToken(); //fourth token
- var closeBracketToken = _lexer.ReadToken(); //fifth token
- //Foruth needs to be a register and Fifth needs to be a "]":
- if (!isRegister(registerToken) || closeBracketToken.Type != TokenType.CloseBracket)
- _lexer.Bail("Expected a [ Number + Register ] here, no idea what I got.");
- outputProgramCode.Add(number);
- return AddressingMode.IndexedRegister + _registers[registerToken.Datum];
- }
- else
- {
- _lexer.Bail("Expected ] or + (as part of [Number] or [Number + Register])");
- return -1;
- }
- }
- else if (isRegister(_lexer.PeekToken()))
- {
- // Is a Register!
- // [ Register ] OR [ Register + Number ] OR [ Register + Label ]
- string registerName = _lexer.ReadToken().Datum;
- int registerNumber = _registers[registerName];
- var thirdToken = _lexer.ReadToken();
- if (thirdToken.Type == TokenType.CloseBracket)
- {
- // [ Register ]
- return AddressingMode.IndirectRegister + registerNumber;
- }
- else if (thirdToken.Type == TokenType.Plus)
- {
- // [ Register + Number ] OR [ Register + Label ]
- if (_lexer.PeekToken().Type == TokenType.Number)
- {
- // [ Register + Number ]
- var number = parseNumber();
- outputProgramCode.Add(number);
- ExpectToken("Expected a ] here", TokenType.CloseBracket);
- return AddressingMode.IndexedRegister + registerNumber;
- }
- else if (isLabel(_lexer.PeekToken()))
- {
- // [ Register + Label ]
- var label = _lexer.ReadToken().Datum;
- insertLabel(ref outputProgramCode, label);
- ExpectToken("Expected a ] here", TokenType.CloseBracket);
- return AddressingMode.IndexedRegister + registerNumber;
- }
- _lexer.Bail("Expected number or label after + (as part of [Register + Number])");
- return -1;
- }
- else
- {
- _lexer.Bail("Expected ] or + (as part of [Register] or [Register + Number])");
- return -1;
- }
- }
- else if (isLabel(_lexer.PeekToken()))
- {
- // [ Label ] OR [ Label + Register ]
- string label = _lexer.ReadToken().Datum;
- var thirdToken = _lexer.ReadToken();
- if (thirdToken.Type == TokenType.CloseBracket)
- {
- // [ Label ]
- insertLabel(ref outputProgramCode, label);
- return AddressingMode.MemoryLocation;
- }
- else if (thirdToken.Type == TokenType.Plus)
- {
- // [ Label + Register ]
- var registerToken = _lexer.ReadToken(); //fourth token
- var closeBracketToken = _lexer.ReadToken(); //fifth token
- //Foruth needs to be a register and Fifth needs to be a "]":
- if (!isRegister(registerToken) || closeBracketToken.Type != TokenType.CloseBracket)
- _lexer.Bail("Expected a [ Label + Register ] here, no idea what I got.");
- insertLabel(ref outputProgramCode, label);
- return AddressingMode.IndexedRegister + _registers[registerToken.Datum];
- }
- else
- {
- _lexer.Bail("Expected ] or + (as part of [Label] or [Label + Register])");
- return -1;
- }
- }
- else
- {
- _lexer.Bail("Expected Register or Number (as part of [Register] or [Number] or [Register + Number])");
- return -1;
- }
- }
- _lexer.Bail("ICE: Should never reach here");
- return -1;
- default:
- _lexer.Bail("Expected addressing scheme (Number or Register or [Register] or [Number] or [Register + Number]) here, not whatever was here.");
- return -1;
- }
- }
- //a basic instruction has the format: bbbbbbaaaaaaoooo
- private ushort ConstructInstruction(int o, int a, int b)
- {
- return (ushort)((b << 10) | ((a & 0x3F) << 4) | (o & 0xF));
- }
- //Grammar for a line: OPTIONAL_LABEL OPCODE ADDRESSINGSCHEME "," ADDRESSINGSCHEME "\n"
- //With an exception: OPTIONAL_LABEL "JSR" ADDRESSINGSCHEME "\n"
- private ushort[] DoTheThing()
- {
- var outputProgramCode = new List<ushort>();
- while (true)
- {
- //Skip all newlines
- while (_lexer.PeekToken().Type == TokenType.NewLine)
- {
- _lexer.ReadToken();
- }
- //If we're at the end of the file, we're done!
- if (_lexer.PeekToken().Type == TokenType.Eof)
- return outputProgramCode.ToArray();
- //Begin parsing current instruction
- var currentInstructionLocation = (ushort)outputProgramCode.Count;
- outputProgramCode.Add(0xBAAD); //Temporary sentinel
- //The first thing we expect in a line of assembler is an optional label.
- if (_lexer.PeekToken().Type == TokenType.Label)
- {
- var labelName = _lexer.ReadToken().Datum.ToUpper();
- //Add to list of labels so we can backreference it later
- _labels.Add(labelName, currentInstructionLocation);
- //Sort out forward references to this label.
- if (_backRefsToDo.ContainsKey(labelName))
- foreach (var backref in _backRefsToDo[labelName])
- outputProgramCode[backref] = currentInstructionLocation;
- }
- var opcodeToken = ExpectToken("Parser: Expected Instruction Opcode", TokenType.Word);
- var opcode = opcodeToken.Datum.ToUpper();
- if (opcode == "JSR")
- {
- int addressingMode = parseAdressingScheme(ref outputProgramCode);
- ExpectToken("Expected end of line", TokenType.NewLine, TokenType.Eof);
- outputProgramCode[currentInstructionLocation] = ConstructInstruction(0, 0x01, addressingMode);
- }
- else if (_opCodes.ContainsKey(opcode))
- {
- int addressingModeA = parseAdressingScheme(ref outputProgramCode);
- ExpectToken("Expected comma", TokenType.Comma);
- int addressingModeB = parseAdressingScheme(ref outputProgramCode);
- ExpectToken("Expected end of line", TokenType.NewLine, TokenType.Eof);
- outputProgramCode[currentInstructionLocation] = ConstructInstruction(_opCodes[opcode], addressingModeA, addressingModeB);
- }
- else
- _lexer.Bail("Parser: Invalid Opcode");
- }
- }
- }
- class Program
- {
- private int lineNumber = 0;
- private static void TestLexer()
- {
- string line;
- using (var file = new StreamReader(@"C:\Users\awood\Documents\work\0x10c\Assembler2\a.s"))
- {
- var lexer = new Lexer(file);
- while (lexer.PeekToken().Type != TokenType.Eof)
- {
- var token = lexer.ReadToken();
- Console.WriteLine("{0} {1}", token.Type.ToString(), token.Datum);
- }
- }
- Console.ReadLine();
- }
- private static void TestParser()
- {
- using (var file = new StreamReader(@"C:\Users\awood\Documents\work\0x10c\Assembler2\a.s"))
- {
- var lexer = new Lexer(file);
- var parser = new Parser(lexer);
- var result = parser.Parse();
- foreach (var w in result)
- {
- Console.Write("{0:X4} ", w);
- }
- }
- Console.WriteLine();
- Console.ReadLine();
- }
- static void Main(string[] args)
- {
- TestParser();
- }
- }
- }