SHOW:
|
|
- or go back to the newest paste.
1 | //TinyAssembler by NasenSpray | |
2 | ||
3 | #include <iostream> | |
4 | #include <fstream> | |
5 | #include <unordered_map> | |
6 | #include <algorithm> | |
7 | #include <string> | |
8 | #include <list> | |
9 | #include <cstdint> | |
10 | #include <sstream> | |
11 | #include <regex> | |
12 | #include <iomanip> | |
13 | ||
14 | namespace tiny | |
15 | { | |
16 | enum OperandType { | |
17 | None, | |
18 | Memory, | |
19 | Literal, | |
20 | Label, | |
21 | }; | |
22 | ||
23 | enum Mnemonic { | |
24 | AND, OR, XOR, NOT, | |
25 | MOV, RANDOM, ADD, SUB, | |
26 | JMP, JZ, JEQ, JLS, | |
27 | JGT, HALT, APRINT, DPRINT, | |
28 | }; | |
29 | ||
30 | enum OperandCombination { | |
31 | N = 0, | |
32 | M = 0x1, | |
33 | L = 0x2, | |
34 | MM = 0x11, | |
35 | ML = 0x12, | |
36 | LM = 0x21, | |
37 | LL = 0x22, | |
38 | MMM = 0x111, | |
39 | LMM = 0x211, | |
40 | MML = 0x112, | |
41 | LML = 0x212, | |
42 | }; | |
43 | ||
44 | enum Instruction { | |
45 | AND_MM = 0x00, | |
46 | AND_ML = 0x01, | |
47 | OR_MM = 0x02, | |
48 | OR_ML = 0x03, | |
49 | XOR_MM = 0x04, | |
50 | XOR_ML = 0x05, | |
51 | NOT_M = 0x06, | |
52 | MOV_MM = 0x07, | |
53 | MOV_ML = 0x08, | |
54 | RANDOM_M = 0x09, | |
55 | ADD_MM = 0x0A, | |
56 | ADD_ML = 0x0B, | |
57 | SUB_MM = 0x0C, | |
58 | SUB_ML = 0x0D, | |
59 | JMP_M = 0x0E, | |
60 | JMP_L = 0x0F, | |
61 | JZ_MM = 0x10, | |
62 | JZ_ML = 0x11, | |
63 | JZ_LM = 0x12, | |
64 | JZ_LL = 0x13, | |
65 | JEQ_MMM = 0x14, | |
66 | JEQ_LMM = 0x15, | |
67 | JEQ_MML = 0x16, | |
68 | JEQ_LML = 0x17, | |
69 | JLS_MMM = 0x18, | |
70 | JLS_LMM = 0x19, | |
71 | JLS_MML = 0x1A, | |
72 | JLS_LML = 0x1B, | |
73 | JGT_MMM = 0x1C, | |
74 | JGT_LMM = 0x1D, | |
75 | JGT_MML = 0x1E, | |
76 | JGT_LML = 0x1F, | |
77 | HALT_N = 0xFF, | |
78 | APRINT_M = 0x20, | |
79 | APRINT_L = 0x21, | |
80 | DPRINT_M = 0x22, | |
81 | DPRINT_L = 0x23, | |
82 | }; | |
83 | ||
84 | struct Operand { | |
85 | OperandType type; | |
86 | uint8_t value; | |
87 | std::string label; | |
88 | ||
89 | Operand() : type(OperandType::None) {} | |
90 | Operand(OperandType t, uint8_t v) : type(t), value(v) {} | |
91 | Operand(OperandType t, std::string label) : type(t), label(label) {} | |
92 | }; | |
93 | ||
94 | struct MnemonicInstruction { | |
95 | Mnemonic op; | |
96 | std::vector<Operand> operand; | |
97 | }; | |
98 | ||
99 | struct TranslatedInstruction { | |
100 | Instruction op; | |
101 | std::vector<Operand> operand; | |
102 | ||
103 | TranslatedInstruction(Instruction op, std::vector<Operand>&& operands) : op(op), operand(operands) {} | |
104 | size_t size() const { return operand.size() + 1; } | |
105 | }; | |
106 | ||
107 | std::pair<std::string, Mnemonic> MnemonicMapData[] = | |
108 | { | |
109 | std::make_pair("AND", Mnemonic::AND), | |
110 | std::make_pair("OR", Mnemonic::OR), | |
111 | std::make_pair("XOR", Mnemonic::XOR), | |
112 | std::make_pair("NOT", Mnemonic::NOT), | |
113 | std::make_pair("MOV", Mnemonic::MOV), | |
114 | std::make_pair("RANDOM", Mnemonic::RANDOM), | |
115 | std::make_pair("ADD", Mnemonic::ADD), | |
116 | std::make_pair("SUB", Mnemonic::SUB), | |
117 | std::make_pair("JMP", Mnemonic::JMP), | |
118 | std::make_pair("JZ", Mnemonic::JZ), | |
119 | std::make_pair("JEQ", Mnemonic::JEQ), | |
120 | std::make_pair("JLS", Mnemonic::JLS), | |
121 | std::make_pair("JGT", Mnemonic::JGT), | |
122 | std::make_pair("HALT", Mnemonic::HALT), | |
123 | std::make_pair("APRINT", Mnemonic::APRINT), | |
124 | std::make_pair("DPRINT", Mnemonic::DPRINT) | |
125 | }; | |
126 | ||
127 | std::pair<Mnemonic, std::pair<OperandCombination, Instruction>> InstructionData[] = | |
128 | { | |
129 | std::make_pair(Mnemonic::AND, std::make_pair(OperandCombination::MM, Instruction::AND_MM)), | |
130 | std::make_pair(Mnemonic::AND, std::make_pair(OperandCombination::ML, Instruction::AND_ML)), | |
131 | std::make_pair(Mnemonic::OR, std::make_pair(OperandCombination::MM, Instruction::OR_MM)), | |
132 | std::make_pair(Mnemonic::OR, std::make_pair(OperandCombination::ML, Instruction::OR_ML)), | |
133 | std::make_pair(Mnemonic::XOR, std::make_pair(OperandCombination::MM, Instruction::XOR_MM)), | |
134 | std::make_pair(Mnemonic::XOR, std::make_pair(OperandCombination::ML, Instruction::XOR_ML)), | |
135 | std::make_pair(Mnemonic::NOT, std::make_pair(OperandCombination::M, Instruction::NOT_M)), | |
136 | ||
137 | std::make_pair(Mnemonic::MOV, std::make_pair(OperandCombination::MM, Instruction::MOV_MM)), | |
138 | std::make_pair(Mnemonic::MOV, std::make_pair(OperandCombination::ML, Instruction::MOV_ML)), | |
139 | ||
140 | std::make_pair(Mnemonic::RANDOM, std::make_pair(OperandCombination::M, Instruction::RANDOM_M)), | |
141 | ||
142 | std::make_pair(Mnemonic::ADD, std::make_pair(OperandCombination::MM, Instruction::ADD_MM)), | |
143 | std::make_pair(Mnemonic::ADD, std::make_pair(OperandCombination::ML, Instruction::ADD_ML)), | |
144 | ||
145 | std::make_pair(Mnemonic::SUB, std::make_pair(OperandCombination::MM, Instruction::SUB_MM)), | |
146 | std::make_pair(Mnemonic::SUB, std::make_pair(OperandCombination::ML, Instruction::SUB_ML)), | |
147 | ||
148 | std::make_pair(Mnemonic::JMP, std::make_pair(OperandCombination::M, Instruction::JMP_M)), | |
149 | std::make_pair(Mnemonic::JMP, std::make_pair(OperandCombination::L, Instruction::JMP_L)), | |
150 | ||
151 | std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::MM, Instruction::JZ_MM)), | |
152 | std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::ML, Instruction::JZ_ML)), | |
153 | std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::LM, Instruction::JZ_LM)), | |
154 | std::make_pair(Mnemonic::JZ, std::make_pair(OperandCombination::LL, Instruction::JZ_LL)), | |
155 | ||
156 | std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::MMM, Instruction::JEQ_MMM)), | |
157 | std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::LMM, Instruction::JEQ_LMM)), | |
158 | std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::MML, Instruction::JEQ_MML)), | |
159 | std::make_pair(Mnemonic::JEQ, std::make_pair(OperandCombination::LML, Instruction::JEQ_LML)), | |
160 | ||
161 | std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::MMM, Instruction::JLS_MMM)), | |
162 | std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::LMM, Instruction::JLS_LMM)), | |
163 | std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::MML, Instruction::JLS_MML)), | |
164 | std::make_pair(Mnemonic::JLS, std::make_pair(OperandCombination::LML, Instruction::JLS_LML)), | |
165 | ||
166 | std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::MMM, Instruction::JGT_MMM)), | |
167 | std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::LMM, Instruction::JGT_LMM)), | |
168 | std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::MML, Instruction::JGT_MML)), | |
169 | std::make_pair(Mnemonic::JGT, std::make_pair(OperandCombination::LML, Instruction::JGT_LML)), | |
170 | ||
171 | std::make_pair(Mnemonic::HALT, std::make_pair(OperandCombination::N, Instruction::HALT_N)), | |
172 | ||
173 | std::make_pair(Mnemonic::APRINT, std::make_pair(OperandCombination::M, Instruction::APRINT_M)), | |
174 | std::make_pair(Mnemonic::APRINT, std::make_pair(OperandCombination::L, Instruction::APRINT_L)), | |
175 | std::make_pair(Mnemonic::DPRINT, std::make_pair(OperandCombination::M, Instruction::DPRINT_M)), | |
176 | std::make_pair(Mnemonic::DPRINT, std::make_pair(OperandCombination::L, Instruction::DPRINT_L)), | |
177 | }; | |
178 | ||
179 | std::unordered_map<std::string, Mnemonic> MnemonicMap(MnemonicMapData, MnemonicMapData + (sizeof(MnemonicMapData) / sizeof(MnemonicMapData[0]))); | |
180 | std::unordered_multimap<Mnemonic, std::pair<OperandCombination, Instruction>> MnemonicInstructionMap(InstructionData, InstructionData + (sizeof(InstructionData) / sizeof(InstructionData[0]))); | |
181 | ||
182 | struct AssemblerError { | |
183 | std::string message; | |
184 | AssemblerError(std::string msg) : message(msg) {} | |
185 | }; | |
186 | ||
187 | class Assembler | |
188 | { | |
189 | public: | |
190 | Assembler() : offset(0) {} | |
191 | ||
192 | void push_line(std::string s) | |
193 | { | |
194 | //matches empty line | |
195 | static const std::regex regWhite("^\\s*(//.*)?$"); | |
196 | //matches labels | |
197 | static const std::regex regLabel("^\\s*([a-zA-Z_]+[a-zA-Z_0-9]*)\\s*:\\s*(//.*)?$"); | |
198 | //matches syntactically valid instruction formats | |
199 | static const std::regex regInstruction("^\\s*([[:alpha:]]+)(?:\\s*|((\\s+(?:[0-9]+|[a-zA-Z_]+[a-zA-Z_0-9]*|'.'|\\[\\s*[0-9]+\\s*\\]))+))\\s*(//.*)?$"); | |
200 | ||
201 | std::smatch match; | |
202 | if (std::regex_match(s, regWhite)) { | |
203 | return; | |
204 | } else if (std::regex_match(s, match, regLabel)) { | |
205 | auto str = match[1].str(); | |
206 | str = str.substr(0, str.find_first_of(" \t:")); | |
207 | std::transform(str.begin(), str.end(), str.begin(), ::toupper); | |
208 | if (MnemonicMap.find(str) != MnemonicMap.end()) | |
209 | throw AssemblerError("Illegal label name: " + match[1].str()); | |
210 | if (labelMap.insert(std::make_pair(match[1].str(), offset)).second == false) | |
211 | throw AssemblerError("Label already defined: " + match[1].str()); | |
212 | } else if (std::regex_match(s, match, regInstruction)) { | |
213 | auto str = match[1].str(); | |
214 | std::transform(str.begin(), str.end(), str.begin(), ::toupper); | |
215 | auto op = MnemonicMap.find(str); | |
216 | if (op == MnemonicMap.end()) { | |
217 | //try to find a possible suggestion | |
218 | for (size_t i = str.length()-1; i > 0; i--) { | |
219 | auto trial = str.substr(0, i); | |
220 | if (MnemonicMap.find(trial) != MnemonicMap.end()) | |
221 | throw AssemblerError("Unrecognized instruction: " + str + " (did you mean " + trial + "?)"); | |
222 | } | |
223 | throw AssemblerError("Unrecognized instruction: " + str); | |
224 | } | |
225 | ||
226 | MnemonicInstruction m; | |
227 | size_t comb = OperandCombination::N; | |
228 | m.op = op->second; | |
229 | std::string args = match[2].str(); | |
230 | ||
231 | if (!args.empty()) { //we have args to parse :O | |
232 | for(;;) { | |
233 | args.erase(0, args.find_first_not_of(" \t")); | |
234 | if (args.empty() || args.substr(0,2) == "//") | |
235 | break; | |
236 | ||
237 | if (m.operand.size() >= 3) | |
238 | throw AssemblerError("Too many operands"); | |
239 | ||
240 | args.erase(0, args.find_first_not_of(" \t")); | |
241 | if (args[0] == '[') { | |
242 | //memory operand | |
243 | int val = 0xFFFF; | |
244 | try { | |
245 | val = std::stoi(args.substr(1, args.find_first_of(']')-1)); | |
246 | } catch (...) {} | |
247 | ||
248 | if (val < 0 || val > 255) | |
249 | throw AssemblerError("Illegal operand: " + args); | |
250 | ||
251 | m.operand.push_back(Operand(OperandType::Memory, (uint8_t)val)); | |
252 | comb <<= 4; comb |= 1; | |
253 | args.erase(0, args.find_first_of(']')+1); | |
254 | } else if (args[0] == '\'') { | |
255 | //ASCII literal | |
256 | m.operand.push_back(Operand(OperandType::Literal, args[1])); | |
257 | comb <<= 4; comb |= 2; | |
258 | args.erase(0, 3); | |
259 | } else { | |
260 | //literal | |
261 | auto lit = args.substr(0, args.find_first_of(" \t")); | |
262 | if (lit.find_first_not_of("0123456789") == std::string::npos) { | |
263 | //numeric | |
264 | int val = 0xFFFF; | |
265 | try { | |
266 | val = std::stoi(lit); | |
267 | } catch (...) {} | |
268 | ||
269 | if (val < 0 || val > 255) | |
270 | throw AssemblerError("Illegal operand: " + lit); | |
271 | ||
272 | m.operand.push_back(Operand(OperandType::Literal, (uint8_t)val)); | |
273 | comb <<= 4; comb |= 0x2; | |
274 | } else { | |
275 | //label | |
276 | auto labelIt = labelMap.find(lit); | |
277 | if (labelIt == labelMap.end()) { | |
278 | auto check = lit; | |
279 | std::transform(check.begin(), check.end(), check.begin(), ::toupper); | |
280 | if (MnemonicMap.find(check) != MnemonicMap.end()) | |
281 | throw AssemblerError("Illegal label name: " + lit); | |
282 | m.operand.push_back(Operand(OperandType::Label, lit)); | |
283 | } else | |
284 | m.operand.push_back(Operand(OperandType::Literal, labelIt->second)); | |
285 | comb <<= 4; comb |= 2; | |
286 | } | |
287 | ||
288 | args.erase(0, lit.size()); | |
289 | } | |
290 | } | |
291 | } | |
292 | ||
293 | //check for existence of Instruction | |
294 | auto range = MnemonicInstructionMap.equal_range(m.op); | |
295 | for (auto it = range.first; it != range.second; it++) { | |
296 | if (it->second.first == (OperandCombination)comb) { | |
297 | //found it! | |
298 | TranslatedInstruction t(it->second.second, std::move(m.operand)); | |
299 | offset += t.size(); | |
300 | if (offset > 255) | |
301 | throw AssemblerError("Output to big"); | |
302 | instructions.push_back(t); | |
303 | return; | |
304 | } | |
305 | } | |
306 | throw AssemblerError("Unexpected or missing operands"); | |
307 | } else { | |
308 | throw AssemblerError("Unrecognized input: " + s); | |
309 | } | |
310 | ||
311 | return; | |
312 | ||
313 | } | |
314 | ||
315 | template<class F> | |
316 | bool emit(F emitter) | |
317 | { | |
318 | if (!instructions.empty()) { | |
319 | auto inst = instructions.front(); | |
320 | emitter(inst.op); | |
321 | for (size_t i = 0; i < inst.operand.size(); ++i) | |
322 | emitter(inst.operand[i].value); | |
323 | instructions.pop_front(); | |
324 | return true; | |
325 | } | |
326 | return false; | |
327 | } | |
328 | ||
329 | void resolve() | |
330 | { | |
331 | //resolves labels | |
332 | for (auto it = instructions.begin(); it != instructions.end(); it++) { | |
333 | for (auto itO = it->operand.begin(); itO != it->operand.end(); itO++) { | |
334 | if (itO->type == OperandType::Label) { | |
335 | auto itL = labelMap.find(itO->label); | |
336 | if (itL == labelMap.end()) | |
337 | throw AssemblerError("Unresolved label: " + itO->label); | |
338 | ||
339 | itO->type = OperandType::Literal; | |
340 | itO->value = itL->second; | |
341 | } | |
342 | } | |
343 | } | |
344 | ||
345 | } | |
346 | ||
347 | private: | |
348 | std::list<TranslatedInstruction> instructions; | |
349 | std::unordered_map<std::string, size_t> labelMap; | |
350 | size_t offset; | |
351 | }; | |
352 | } | |
353 | ||
354 | int main(int argc, char *argv[]) | |
355 | { | |
356 | using namespace std; | |
357 | ||
358 | cout << "tinyAssembler v0.1" << endl; | |
359 | ||
360 | if (argc != 2) { | |
361 | cout << "Usage: tiny <input file>" << endl; | |
362 | return -1; | |
363 | } | |
364 | ||
365 | ifstream input(argv[1]); | |
366 | if (!input.is_open()) { | |
367 | cout << "Couldn't open file: " << argv[1] << endl; | |
368 | return -1; | |
369 | } | |
370 | ||
371 | tiny::Assembler as; | |
372 | size_t lineNr = 0; | |
373 | string line; | |
374 | try { | |
375 | while (getline(input, line)) { | |
376 | lineNr++; | |
377 | as.push_line(line); | |
378 | } | |
379 | //resolve missing labels | |
380 | as.resolve(); | |
381 | ||
382 | cout << "Output:" << endl; | |
383 | size_t offset = 0; | |
384 | while (as.emit([&](uint8_t val) { cout << "0x" << hex << setfill('0') << setw(2) << (int)val << " "; offset++;})) | |
385 | cout << endl; | |
386 | ||
387 | - | cout << endl << "Done, emitted " << offset << " bytes" << endl; |
387 | + | cout << endl << "Done, emitted " << dec << offset << " bytes" << endl; |
388 | } catch (tiny::AssemblerError err) { | |
389 | - | cout << "Error in line " << lineNr << ": " << line << endl; |
389 | + | cout << "Error in line " << dec << lineNr << ": " << line << endl; |
390 | cout << err.message << endl; | |
391 | } | |
392 | return 0; | |
393 | } |