Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //TinyAssembler by NasenSpray
- #include <iostream>
- #include <fstream>
- #include <unordered_map>
- #include <algorithm>
- #include <string>
- #include <list>
- #include <cstdint>
- #include <sstream>
- #include <regex>
- #include <iomanip>
- namespace tiny
- {
- enum OperandType {
- None,
- Memory,
- Literal,
- Label,
- };
- enum Mnemonic {
- AND, OR, XOR, NOT,
- MOV, RANDOM, ADD, SUB,
- JMP, JZ, JEQ, JLS,
- JGT, HALT, APRINT, DPRINT,
- };
- enum OperandCombination {
- N = 0,
- M = 0x1,
- L = 0x2,
- MM = 0x11,
- ML = 0x12,
- LM = 0x21,
- LL = 0x22,
- MMM = 0x111,
- LMM = 0x211,
- MML = 0x112,
- LML = 0x212,
- };
- enum Instruction {
- AND_MM = 0x00,
- AND_ML = 0x01,
- OR_MM = 0x02,
- OR_ML = 0x03,
- XOR_MM = 0x04,
- XOR_ML = 0x05,
- NOT_M = 0x06,
- MOV_MM = 0x07,
- MOV_ML = 0x08,
- RANDOM_M = 0x09,
- ADD_MM = 0x0A,
- ADD_ML = 0x0B,
- SUB_MM = 0x0C,
- SUB_ML = 0x0D,
- JMP_M = 0x0E,
- JMP_L = 0x0F,
- JZ_MM = 0x10,
- JZ_ML = 0x11,
- JZ_LM = 0x12,
- JZ_LL = 0x13,
- JEQ_MMM = 0x14,
- JEQ_LMM = 0x15,
- JEQ_MML = 0x16,
- JEQ_LML = 0x17,
- JLS_MMM = 0x18,
- JLS_LMM = 0x19,
- JLS_MML = 0x1A,
- JLS_LML = 0x1B,
- JGT_MMM = 0x1C,
- JGT_LMM = 0x1D,
- JGT_MML = 0x1E,
- JGT_LML = 0x1F,
- HALT_N = 0xFF,
- APRINT_M = 0x20,
- APRINT_L = 0x21,
- DPRINT_M = 0x22,
- DPRINT_L = 0x23,
- };
- struct Operand {
- OperandType type;
- uint8_t value;
- std::string label;
- Operand() : type(OperandType::None) {}
- Operand(OperandType t, uint8_t v) : type(t), value(v) {}
- Operand(OperandType t, std::string label) : type(t), label(label) {}
- };
- struct MnemonicInstruction {
- Mnemonic op;
- std::vector<Operand> operand;
- };
- struct TranslatedInstruction {
- Instruction op;
- std::vector<Operand> operand;
- TranslatedInstruction(Instruction op, std::vector<Operand>&& operands) : op(op), operand(operands) {}
- size_t size() const { return operand.size() + 1; }
- };
- std::pair<std::string, Mnemonic> MnemonicMapData[] =
- {
- std::make_pair("AND", Mnemonic::AND),
- std::make_pair("OR", Mnemonic::OR),
- std::make_pair("XOR", Mnemonic::XOR),
- std::make_pair("NOT", Mnemonic::NOT),
- std::make_pair("MOV", Mnemonic::MOV),
- std::make_pair("RANDOM", Mnemonic::RANDOM),
- std::make_pair("ADD", Mnemonic::ADD),
- std::make_pair("SUB", Mnemonic::SUB),
- std::make_pair("JMP", Mnemonic::JMP),
- std::make_pair("JZ", Mnemonic::JZ),
- std::make_pair("JEQ", Mnemonic::JEQ),
- std::make_pair("JLS", Mnemonic::JLS),
- std::make_pair("JGT", Mnemonic::JGT),
- std::make_pair("HALT", Mnemonic::HALT),
- std::make_pair("APRINT", Mnemonic::APRINT),
- std::make_pair("DPRINT", Mnemonic::DPRINT)
- };
- std::pair<Mnemonic, std::pair<OperandCombination, Instruction>> InstructionData[] =
- {
- std::make_pair(Mnemonic::AND, std::make_pair(OperandCombination::MM, Instruction::AND_MM)),
- std::make_pair(Mnemonic::AND, std::make_pair(OperandCombination::ML, Instruction::AND_ML)),
- std::make_pair(Mnemonic::OR, std::make_pair(OperandCombination::MM, Instruction::OR_MM)),
- std::make_pair(Mnemonic::OR, std::make_pair(OperandCombination::ML, Instruction::OR_ML)),
- std::make_pair(Mnemonic::XOR, std::make_pair(OperandCombination::MM, Instruction::XOR_MM)),
- std::make_pair(Mnemonic::XOR, std::make_pair(OperandCombination::ML, Instruction::XOR_ML)),
- std::make_pair(Mnemonic::NOT, std::make_pair(OperandCombination::M, Instruction::NOT_M)),
- std::make_pair(Mnemonic::MOV, std::make_pair(OperandCombination::MM, Instruction::MOV_MM)),
- std::make_pair(Mnemonic::MOV, std::make_pair(OperandCombination::ML, Instruction::MOV_ML)),
- std::make_pair(Mnemonic::RANDOM, std::make_pair(OperandCombination::M, Instruction::RANDOM_M)),
- std::make_pair(Mnemonic::ADD, std::make_pair(OperandCombination::MM, Instruction::ADD_MM)),
- std::make_pair(Mnemonic::ADD, std::make_pair(OperandCombination::ML, Instruction::ADD_ML)),
- std::make_pair(Mnemonic::SUB, std::make_pair(OperandCombination::MM, Instruction::SUB_MM)),
- std::make_pair(Mnemonic::SUB, std::make_pair(OperandCombination::ML, Instruction::SUB_ML)),
- std::make_pair(Mnemonic::JMP, std::make_pair(OperandCombination::M, Instruction::JMP_M)),
- std::make_pair(Mnemonic::JMP, std::make_pair(OperandCombination::L, Instruction::JMP_L)),
- std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::MM, Instruction::JZ_MM)),
- std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::ML, Instruction::JZ_ML)),
- std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::LM, Instruction::JZ_LM)),
- std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::LL, Instruction::JZ_LL)),
- std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::MMM, Instruction::JEQ_MMM)),
- std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::LMM, Instruction::JEQ_LMM)),
- std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::MML, Instruction::JEQ_MML)),
- std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::LML, Instruction::JEQ_LML)),
- std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::MMM, Instruction::JLS_MMM)),
- std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::LMM, Instruction::JLS_LMM)),
- std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::MML, Instruction::JLS_MML)),
- std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::LML, Instruction::JLS_LML)),
- std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::MMM, Instruction::JGT_MMM)),
- std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::LMM, Instruction::JGT_LMM)),
- std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::MML, Instruction::JGT_MML)),
- std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::LML, Instruction::JGT_LML)),
- std::make_pair(Mnemonic::HALT, std::make_pair(OperandCombination::N, Instruction::HALT_N)),
- std::make_pair(Mnemonic::APRINT, std::make_pair(OperandCombination::M, Instruction::APRINT_M)),
- std::make_pair(Mnemonic::APRINT, std::make_pair(OperandCombination::L, Instruction::APRINT_L)),
- std::make_pair(Mnemonic::DPRINT, std::make_pair(OperandCombination::M, Instruction::DPRINT_M)),
- std::make_pair(Mnemonic::DPRINT, std::make_pair(OperandCombination::L, Instruction::DPRINT_L)),
- };
- std::unordered_map<std::string, Mnemonic> MnemonicMap(MnemonicMapData, MnemonicMapData + (sizeof(MnemonicMapData) / sizeof(MnemonicMapData[0])));
- std::unordered_multimap<Mnemonic, std::pair<OperandCombination, Instruction>> MnemonicInstructionMap(InstructionData, InstructionData + (sizeof(InstructionData) / sizeof(InstructionData[0])));
- struct AssemblerError {
- std::string message;
- AssemblerError(std::string msg) : message(msg) {}
- };
- class Assembler
- {
- public:
- Assembler() : offset(0) {}
- void push_line(std::string s)
- {
- //matches empty line
- static const std::regex regWhite("^\\s*(//.*)?$");
- //matches labels
- static const std::regex regLabel("^\\s*([a-zA-Z_]+[a-zA-Z_0-9]*)\\s*:\\s*(//.*)?$");
- //matches syntactically valid instruction formats
- static const std::regex regInstruction("^\\s*([[:alpha:]]+)(?:\\s*|((\\s+(?:[0-9]+|[a-zA-Z_]+[a-zA-Z_0-9]*|'.'|\\[\\s*[0-9]+\\s*\\]))+))\\s*(//.*)?$");
- std::smatch match;
- if (std::regex_match(s, regWhite)) {
- return;
- } else if (std::regex_match(s, match, regLabel)) {
- auto str = match[1].str();
- str = str.substr(0, str.find_first_of(" \t:"));
- std::transform(str.begin(), str.end(), str.begin(), ::toupper);
- if (MnemonicMap.find(str) != MnemonicMap.end())
- throw AssemblerError("Illegal label name: " + match[1].str());
- if (labelMap.insert(std::make_pair(match[1].str(), offset)).second == false)
- throw AssemblerError("Label already defined: " + match[1].str());
- } else if (std::regex_match(s, match, regInstruction)) {
- auto str = match[1].str();
- std::transform(str.begin(), str.end(), str.begin(), ::toupper);
- auto op = MnemonicMap.find(str);
- if (op == MnemonicMap.end()) {
- //try to find a possible suggestion
- for (size_t i = str.length()-1; i > 0; i--) {
- auto trial = str.substr(0, i);
- if (MnemonicMap.find(trial) != MnemonicMap.end())
- throw AssemblerError("Unrecognized instruction: " + str + " (did you mean " + trial + "?)");
- }
- throw AssemblerError("Unrecognized instruction: " + str);
- }
- MnemonicInstruction m;
- size_t comb = OperandCombination::N;
- m.op = op->second;
- std::string args = match[2].str();
- if (!args.empty()) { //we have args to parse :O
- for(;;) {
- args.erase(0, args.find_first_not_of(" \t"));
- if (args.empty() || args.substr(0,2) == "//")
- break;
- if (m.operand.size() >= 3)
- throw AssemblerError("Too many operands");
- args.erase(0, args.find_first_not_of(" \t"));
- if (args[0] == '[') {
- //memory operand
- int val = 0xFFFF;
- try {
- val = std::stoi(args.substr(1, args.find_first_of(']')-1));
- } catch (...) {}
- if (val < 0 || val > 255)
- throw AssemblerError("Illegal operand: " + args);
- m.operand.push_back(Operand(OperandType::Memory, (uint8_t)val));
- comb <<= 4; comb |= 1;
- args.erase(0, args.find_first_of(']')+1);
- } else if (args[0] == '\'') {
- //ASCII literal
- m.operand.push_back(Operand(OperandType::Literal, args[1]));
- comb <<= 4; comb |= 2;
- args.erase(0, 3);
- } else {
- //literal
- auto lit = args.substr(0, args.find_first_of(" \t"));
- if (lit.find_first_not_of("0123456789") == std::string::npos) {
- //numeric
- int val = 0xFFFF;
- try {
- val = std::stoi(lit);
- } catch (...) {}
- if (val < 0 || val > 255)
- throw AssemblerError("Illegal operand: " + lit);
- m.operand.push_back(Operand(OperandType::Literal, (uint8_t)val));
- comb <<= 4; comb |= 0x2;
- } else {
- //label
- auto labelIt = labelMap.find(lit);
- if (labelIt == labelMap.end()) {
- auto check = lit;
- std::transform(check.begin(), check.end(), check.begin(), ::toupper);
- if (MnemonicMap.find(check) != MnemonicMap.end())
- throw AssemblerError("Illegal label name: " + lit);
- m.operand.push_back(Operand(OperandType::Label, lit));
- } else
- m.operand.push_back(Operand(OperandType::Literal, labelIt->second));
- comb <<= 4; comb |= 2;
- }
- args.erase(0, lit.size());
- }
- }
- }
- //check for existence of Instruction
- auto range = MnemonicInstructionMap.equal_range(m.op);
- for (auto it = range.first; it != range.second; it++) {
- if (it->second.first == (OperandCombination)comb) {
- //found it!
- TranslatedInstruction t(it->second.second, std::move(m.operand));
- offset += t.size();
- if (offset > 255)
- throw AssemblerError("Output to big");
- instructions.push_back(t);
- return;
- }
- }
- throw AssemblerError("Unexpected or missing operands");
- } else {
- throw AssemblerError("Unrecognized input: " + s);
- }
- return;
- }
- template<class F>
- bool emit(F emitter)
- {
- if (!instructions.empty()) {
- auto inst = instructions.front();
- emitter(inst.op);
- for (size_t i = 0; i < inst.operand.size(); ++i)
- emitter(inst.operand[i].value);
- instructions.pop_front();
- return true;
- }
- return false;
- }
- void resolve()
- {
- //resolves labels
- for (auto it = instructions.begin(); it != instructions.end(); it++) {
- for (auto itO = it->operand.begin(); itO != it->operand.end(); itO++) {
- if (itO->type == OperandType::Label) {
- auto itL = labelMap.find(itO->label);
- if (itL == labelMap.end())
- throw AssemblerError("Unresolved label: " + itO->label);
- itO->type = OperandType::Literal;
- itO->value = itL->second;
- }
- }
- }
- }
- private:
- std::list<TranslatedInstruction> instructions;
- std::unordered_map<std::string, size_t> labelMap;
- size_t offset;
- };
- }
- int main(int argc, char *argv[])
- {
- using namespace std;
- cout << "tinyAssembler v0.1" << endl;
- if (argc != 2) {
- cout << "Usage: tiny <input file>" << endl;
- return -1;
- }
- ifstream input(argv[1]);
- if (!input.is_open()) {
- cout << "Couldn't open file: " << argv[1] << endl;
- return -1;
- }
- tiny::Assembler as;
- size_t lineNr = 0;
- string line;
- try {
- while (getline(input, line)) {
- lineNr++;
- as.push_line(line);
- }
- //resolve missing labels
- as.resolve();
- cout << "Output:" << endl;
- size_t offset = 0;
- while (as.emit([&](uint8_t val) { cout << "0x" << hex << setfill('0') << setw(2) << (int)val << " "; offset++;}))
- cout << endl;
- cout << endl << "Done, emitted " << dec << offset << " bytes" << endl;
- } catch (tiny::AssemblerError err) {
- cout << "Error in line " << dec << lineNr << ": " << line << endl;
- cout << err.message << endl;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement