View difference between Paste ID: iLCKbwUK and xCArZtwS
SHOW: | | - or go back to the newest paste.
1
//TinyAssembler by NasenSpray
2
3
#include <iostream>
4
#include <fstream>
5
#include <unordered_map>
6
#include <algorithm>
7
#include <string>
8
#include <list>
9
#include <cstdint>
10
#include <sstream>
11
#include <regex>
12
#include <iomanip>
13
14
namespace tiny
15
{
16
    enum OperandType {
17
        None,
18
        Memory,
19
        Literal,
20
        Label,
21
    };
22
23
    enum Mnemonic {
24
        AND,    OR,     XOR,    NOT,
25
        MOV,    RANDOM, ADD,    SUB,
26
        JMP,    JZ,     JEQ,    JLS,
27
        JGT,    HALT,   APRINT, DPRINT,
28
    };
29
30
    enum OperandCombination {
31
        N = 0,
32
        M = 0x1,
33
        L = 0x2,
34
        MM = 0x11,
35
        ML = 0x12,
36
        LM = 0x21,
37
        LL = 0x22,
38
        MMM = 0x111,
39
        LMM = 0x211,
40
        MML = 0x112,
41
        LML = 0x212,
42
    };
43
44
    enum Instruction {
45
        AND_MM   = 0x00,
46
        AND_ML   = 0x01, 
47
        OR_MM    = 0x02,
48
        OR_ML    = 0x03,
49
        XOR_MM   = 0x04,
50
        XOR_ML   = 0x05,
51
        NOT_M    = 0x06,
52
        MOV_MM   = 0x07,
53
        MOV_ML   = 0x08,
54
        RANDOM_M = 0x09,
55
        ADD_MM   = 0x0A,
56
        ADD_ML   = 0x0B,
57
        SUB_MM   = 0x0C,
58
        SUB_ML   = 0x0D,
59
        JMP_M    = 0x0E,
60
        JMP_L    = 0x0F,
61
        JZ_MM    = 0x10,
62
        JZ_ML    = 0x11,
63
        JZ_LM    = 0x12,
64
        JZ_LL    = 0x13,
65
        JEQ_MMM  = 0x14,
66
        JEQ_LMM  = 0x15,
67
        JEQ_MML  = 0x16,
68
        JEQ_LML  = 0x17,
69
        JLS_MMM  = 0x18,
70
        JLS_LMM  = 0x19,
71
        JLS_MML  = 0x1A,
72
        JLS_LML  = 0x1B,
73
        JGT_MMM  = 0x1C,
74
        JGT_LMM  = 0x1D,
75
        JGT_MML  = 0x1E,
76
        JGT_LML  = 0x1F,
77
        HALT_N   = 0xFF,
78
        APRINT_M = 0x20,
79
        APRINT_L = 0x21,
80
        DPRINT_M = 0x22,
81
        DPRINT_L = 0x23,
82
    };
83
84
    struct Operand {
85
        OperandType type;
86
        uint8_t     value;
87
        std::string label;
88
89
        Operand() : type(OperandType::None) {}
90
        Operand(OperandType t, uint8_t v) : type(t), value(v) {}
91
        Operand(OperandType t, std::string label) : type(t), label(label) {}
92
    };
93
94
    struct MnemonicInstruction {
95
        Mnemonic op;
96
        std::vector<Operand>  operand;
97
    };
98
99
    struct TranslatedInstruction {
100
        Instruction op;
101
        std::vector<Operand>  operand;
102
103
        TranslatedInstruction(Instruction op, std::vector<Operand>&& operands) : op(op), operand(operands) {}
104
        size_t size() const { return operand.size() + 1; }
105
    };
106
107
    std::pair<std::string, Mnemonic> MnemonicMapData[] =
108
    {
109
        std::make_pair("AND",    Mnemonic::AND),
110
        std::make_pair("OR",     Mnemonic::OR),
111
        std::make_pair("XOR",    Mnemonic::XOR),
112
        std::make_pair("NOT",    Mnemonic::NOT),
113
        std::make_pair("MOV",    Mnemonic::MOV),
114
        std::make_pair("RANDOM", Mnemonic::RANDOM),
115
        std::make_pair("ADD",    Mnemonic::ADD),
116
        std::make_pair("SUB",    Mnemonic::SUB),
117
        std::make_pair("JMP",    Mnemonic::JMP),
118
        std::make_pair("JZ",     Mnemonic::JZ),
119
        std::make_pair("JEQ",    Mnemonic::JEQ),
120
        std::make_pair("JLS",    Mnemonic::JLS),
121
        std::make_pair("JGT",    Mnemonic::JGT),
122
        std::make_pair("HALT",   Mnemonic::HALT),
123
        std::make_pair("APRINT", Mnemonic::APRINT),
124
        std::make_pair("DPRINT", Mnemonic::DPRINT)
125
    };
126
127
    std::pair<Mnemonic, std::pair<OperandCombination, Instruction>> InstructionData[] = 
128
    {
129
        std::make_pair(Mnemonic::AND, std::make_pair(OperandCombination::MM, Instruction::AND_MM)),
130
        std::make_pair(Mnemonic::AND, std::make_pair(OperandCombination::ML, Instruction::AND_ML)),
131
        std::make_pair(Mnemonic::OR, std::make_pair(OperandCombination::MM, Instruction::OR_MM)),
132
        std::make_pair(Mnemonic::OR, std::make_pair(OperandCombination::ML, Instruction::OR_ML)),
133
        std::make_pair(Mnemonic::XOR, std::make_pair(OperandCombination::MM, Instruction::XOR_MM)),
134
        std::make_pair(Mnemonic::XOR, std::make_pair(OperandCombination::ML, Instruction::XOR_ML)),
135
        std::make_pair(Mnemonic::NOT, std::make_pair(OperandCombination::M, Instruction::NOT_M)),
136
137
        std::make_pair(Mnemonic::MOV, std::make_pair(OperandCombination::MM, Instruction::MOV_MM)),
138
        std::make_pair(Mnemonic::MOV, std::make_pair(OperandCombination::ML, Instruction::MOV_ML)),
139
140
        std::make_pair(Mnemonic::RANDOM, std::make_pair(OperandCombination::M, Instruction::RANDOM_M)),
141
142
        std::make_pair(Mnemonic::ADD, std::make_pair(OperandCombination::MM, Instruction::ADD_MM)),
143
        std::make_pair(Mnemonic::ADD, std::make_pair(OperandCombination::ML, Instruction::ADD_ML)),
144
145
        std::make_pair(Mnemonic::SUB, std::make_pair(OperandCombination::MM, Instruction::SUB_MM)),
146
        std::make_pair(Mnemonic::SUB, std::make_pair(OperandCombination::ML, Instruction::SUB_ML)),
147
148
        std::make_pair(Mnemonic::JMP, std::make_pair(OperandCombination::M, Instruction::JMP_M)),
149
        std::make_pair(Mnemonic::JMP, std::make_pair(OperandCombination::L, Instruction::JMP_L)),
150
151
        std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::MM, Instruction::JZ_MM)),
152
        std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::ML, Instruction::JZ_ML)),
153
        std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::LM, Instruction::JZ_LM)),
154
        std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::LL, Instruction::JZ_LL)),
155
156
        std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::MMM, Instruction::JEQ_MMM)),
157
        std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::LMM, Instruction::JEQ_LMM)),
158
        std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::MML, Instruction::JEQ_MML)),
159
        std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::LML, Instruction::JEQ_LML)),
160
161
        std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::MMM, Instruction::JLS_MMM)),
162
        std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::LMM, Instruction::JLS_LMM)),
163
        std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::MML, Instruction::JLS_MML)),
164
        std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::LML, Instruction::JLS_LML)),
165
166
        std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::MMM, Instruction::JGT_MMM)),
167
        std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::LMM, Instruction::JGT_LMM)),
168
        std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::MML, Instruction::JGT_MML)),
169
        std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::LML, Instruction::JGT_LML)),
170
171
        std::make_pair(Mnemonic::HALT, std::make_pair(OperandCombination::N, Instruction::HALT_N)),
172
173
        std::make_pair(Mnemonic::APRINT, std::make_pair(OperandCombination::M, Instruction::APRINT_M)),
174
        std::make_pair(Mnemonic::APRINT, std::make_pair(OperandCombination::L, Instruction::APRINT_L)),
175
        std::make_pair(Mnemonic::DPRINT, std::make_pair(OperandCombination::M, Instruction::DPRINT_M)),
176
        std::make_pair(Mnemonic::DPRINT, std::make_pair(OperandCombination::L, Instruction::DPRINT_L)),
177
    };
178
179
    std::unordered_map<std::string, Mnemonic> MnemonicMap(MnemonicMapData, MnemonicMapData + (sizeof(MnemonicMapData) / sizeof(MnemonicMapData[0])));
180
    std::unordered_multimap<Mnemonic, std::pair<OperandCombination, Instruction>> MnemonicInstructionMap(InstructionData, InstructionData + (sizeof(InstructionData) / sizeof(InstructionData[0])));
181
182
    struct AssemblerError {
183
        std::string message;
184
        AssemblerError(std::string msg) : message(msg) {}
185
    };
186
187
    class Assembler
188
    {
189
    public:
190
        Assembler() : offset(0) {}
191
192
        void push_line(std::string s)
193
        {
194
            //matches empty line
195
            static const std::regex regWhite("^\\s*(//.*)?$");
196
            //matches labels
197
            static const std::regex regLabel("^\\s*([a-zA-Z_]+[a-zA-Z_0-9]*)\\s*:\\s*(//.*)?$");
198
            //matches syntactically valid instruction formats
199
            static const std::regex regInstruction("^\\s*([[:alpha:]]+)(?:\\s*|((\\s+(?:[0-9]+|[a-zA-Z_]+[a-zA-Z_0-9]*|'.'|\\[\\s*[0-9]+\\s*\\]))+))\\s*(//.*)?$");
200
201
            std::smatch match;
202
            if (std::regex_match(s, regWhite)) {
203
                return;
204
            } else if (std::regex_match(s, match, regLabel)) {
205
                auto str = match[1].str();
206
                str = str.substr(0, str.find_first_of(" \t:"));
207
                std::transform(str.begin(), str.end(), str.begin(), ::toupper);
208
                if (MnemonicMap.find(str) != MnemonicMap.end())
209
                    throw AssemblerError("Illegal label name: " + match[1].str());
210
                if (labelMap.insert(std::make_pair(match[1].str(), offset)).second == false)
211
                    throw AssemblerError("Label already defined: " + match[1].str());
212
            } else if (std::regex_match(s, match, regInstruction)) {
213
                auto str = match[1].str();
214
                std::transform(str.begin(), str.end(), str.begin(), ::toupper);
215
                auto op = MnemonicMap.find(str);
216
                if (op == MnemonicMap.end()) {
217
                    //try to find a possible suggestion
218
                    for (size_t i = str.length()-1; i > 0; i--) {
219
                        auto trial = str.substr(0, i);
220
                        if (MnemonicMap.find(trial) != MnemonicMap.end())
221
                            throw AssemblerError("Unrecognized instruction: " + str + " (did you mean " + trial + "?)");
222
                    }
223
                    throw AssemblerError("Unrecognized instruction: " + str);
224
                }
225
226
                MnemonicInstruction m;
227
                size_t comb = OperandCombination::N;
228
                m.op = op->second;
229
                std::string args = match[2].str();
230
                
231
                if (!args.empty()) { //we have args to parse :O
232
                    for(;;) {
233
                        args.erase(0, args.find_first_not_of(" \t"));
234
                        if (args.empty() || args.substr(0,2) == "//")
235
                            break;
236
237
                        if (m.operand.size() >= 3)
238
                            throw AssemblerError("Too many operands");
239
240
                        args.erase(0, args.find_first_not_of(" \t"));
241
                        if (args[0] == '[') {
242
                            //memory operand
243
                            int val = 0xFFFF;
244
                            try {
245
                                val = std::stoi(args.substr(1, args.find_first_of(']')-1));
246
                            } catch (...) {}
247
248
                            if (val < 0 || val > 255)
249
                                throw AssemblerError("Illegal operand: " + args);
250
251
                            m.operand.push_back(Operand(OperandType::Memory, (uint8_t)val));
252
                            comb <<= 4; comb |= 1;
253
                            args.erase(0, args.find_first_of(']')+1);
254
                        } else if (args[0] == '\'') {
255
                            //ASCII literal
256
                            m.operand.push_back(Operand(OperandType::Literal, args[1]));
257
                            comb <<= 4; comb |= 2;
258
                            args.erase(0, 3);
259
                        } else {
260
                            //literal
261
                            auto lit = args.substr(0, args.find_first_of(" \t"));
262
                            if (lit.find_first_not_of("0123456789") == std::string::npos) {
263
                                //numeric
264
                                int val = 0xFFFF;
265
                                try {
266
                                    val = std::stoi(lit);
267
                                } catch (...) {}
268
269
                                if (val < 0 || val > 255)
270
                                    throw AssemblerError("Illegal operand: " + lit);
271
272
                                m.operand.push_back(Operand(OperandType::Literal, (uint8_t)val));
273
                                comb <<= 4; comb |= 0x2;
274
                            } else {
275
                                //label
276
                                auto labelIt = labelMap.find(lit);
277
                                if (labelIt == labelMap.end()) {
278
                                    auto check = lit;
279
                                    std::transform(check.begin(), check.end(), check.begin(), ::toupper);
280
                                    if (MnemonicMap.find(check) != MnemonicMap.end())
281
                                        throw AssemblerError("Illegal label name: " + lit);
282
                                    m.operand.push_back(Operand(OperandType::Label, lit));
283
                                } else
284
                                    m.operand.push_back(Operand(OperandType::Literal, labelIt->second));
285
                                comb <<= 4; comb |= 2;
286
                            }
287
288
                            args.erase(0, lit.size());
289
                        }
290
                    }
291
                }
292
293
                //check for existence of Instruction
294
                auto range = MnemonicInstructionMap.equal_range(m.op);
295
                for (auto it = range.first; it != range.second; it++) {
296
                    if (it->second.first == (OperandCombination)comb) {
297
                        //found it!
298
                        TranslatedInstruction t(it->second.second, std::move(m.operand));
299
                        offset += t.size();
300
                        if (offset > 255)
301
                            throw AssemblerError("Output to big");
302
                        instructions.push_back(t);
303
                        return;
304
                    }
305
                }
306
                throw AssemblerError("Unexpected or missing operands");
307
            } else {
308
                throw AssemblerError("Unrecognized input: " + s);
309
            }
310
311
            return;
312
313
        }
314
315
        template<class F>
316
        bool emit(F emitter)
317
        {
318
            if (!instructions.empty()) {
319
                auto inst = instructions.front();
320
                emitter(inst.op);
321
                for (size_t i = 0; i < inst.operand.size(); ++i)
322
                    emitter(inst.operand[i].value);
323
                instructions.pop_front();
324
                return true;
325
            }
326
            return false;
327
        }
328
329
        void resolve()   
330
        {
331
            //resolves labels
332
            for (auto it = instructions.begin(); it != instructions.end(); it++) {
333
                for (auto itO = it->operand.begin(); itO != it->operand.end(); itO++) {
334
                    if (itO->type == OperandType::Label) {
335
                        auto itL = labelMap.find(itO->label);
336
                        if (itL == labelMap.end()) 
337
                            throw AssemblerError("Unresolved label: " + itO->label);
338
339
                        itO->type = OperandType::Literal;
340
                        itO->value = itL->second;
341
                    }
342
                }
343
            }
344
345
        }
346
347
    private:
348
        std::list<TranslatedInstruction> instructions;
349
        std::unordered_map<std::string, size_t> labelMap;
350
        size_t offset;
351
    };
352
}
353
354
int main(int argc, char *argv[])
355
{
356
    using namespace std;
357
358
    cout << "tinyAssembler v0.1" << endl;
359
360
    if (argc != 2) {
361
        cout << "Usage: tiny <input file>" << endl;
362
        return -1;
363
    }
364
365
    ifstream input(argv[1]);
366
    if (!input.is_open()) {
367
        cout << "Couldn't open file: " << argv[1] << endl;
368
        return -1;
369
    }
370
    
371
    tiny::Assembler as;
372
    size_t lineNr = 0;
373
    string line;
374
    try {
375
        while (getline(input, line)) {
376
            lineNr++;
377
            as.push_line(line);
378
        }
379
        //resolve missing labels
380
        as.resolve();
381
       
382
        cout << "Output:" << endl;
383
        size_t offset = 0;
384
        while (as.emit([&](uint8_t val) { cout << "0x" << hex << setfill('0') << setw(2) <<  (int)val << " "; offset++;}))
385
            cout << endl;
386
387-
        cout << endl << "Done, emitted " << offset << " bytes" << endl;
387+
        cout << endl << "Done, emitted " << dec << offset << " bytes" << endl;
388
    } catch (tiny::AssemblerError err) {
389-
        cout << "Error in line " << lineNr << ": " << line << endl;
389+
        cout << "Error in line " << dec << lineNr << ": " << line << endl;
390
        cout << err.message << endl;
391
    }
392
    return 0;
393
}