Advertisement
Guest User

Untitled

a guest
Sep 14th, 2015
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 19.60 KB | None | 0 0
  1. /**
  2. * file: assembler.c
  3. * author: matthew.james.bird@gmail.com
  4. *
  5. * usage of assembler for the hack assembly language.
  6. *
  7. */
  8.  
  9. #include "assemble.h"
  10.  
  11. int main(int argc, char* argv[])
  12. {
  13. // check input is correct
  14. if (argc != 3)
  15. {
  16. fprintf(stderr, "Usage: assembler source outputn");
  17. return 1;
  18. }
  19.  
  20. // open source file
  21. FILE* source = fopen(argv[1], "rb");
  22. if (source == NULL)
  23. {
  24. fprintf(stderr, "Error: cannot open source file %sn", argv[1]);
  25. return 1;
  26. }
  27.  
  28. // open output file
  29. FILE* output = fopen(argv[2], "wb");
  30. if (output == NULL)
  31. {
  32. fprintf(stderr, "Error: cannot open output file %sn", argv[2]);
  33. fclose(source);
  34. return 1;
  35. }
  36.  
  37. if (assemble(source, output) == false)
  38. {
  39. printf("Quitting.n");
  40. return 1;
  41. }
  42. return 0;
  43. }
  44.  
  45. /**
  46. * file: assemble.h
  47. * author: matthew.james.bird@gmail.com
  48. *
  49. * assembler for the hack assembly language.
  50. *
  51. */
  52.  
  53. #include <stdio.h>
  54. #include <stdbool.h>
  55.  
  56. #define MAX_A 32767
  57. #define COMP_TABLE_SIZE 28
  58. #define JUMP_TABLE_SIZE 7
  59. #define MAX_SYMBOL_SIZE 10
  60.  
  61. // node for symbol and its translation
  62. typedef struct symNode
  63. {
  64. char symbol[MAX_SYMBOL_SIZE];
  65. char translation[17];
  66. struct symNode* next;
  67. }
  68. symNode;
  69.  
  70. // node for comp code and its translation
  71. typedef struct compNode
  72. {
  73. char entry[4];
  74. char translation[8];
  75. }
  76. compNode;
  77.  
  78. // node for jump code and its translation
  79. typedef struct jumpNode
  80. {
  81. char entry[4];
  82. char translation[4];
  83. }
  84. jumpNode;
  85.  
  86. /**
  87. * assemble: translates source assembly file into machine code.
  88. * returns true on success, else false;
  89. */
  90. bool assemble(FILE* source, FILE* output);
  91.  
  92. /**
  93. * addSym: add the symbol-translation pair to the start of the linked list beginning with head.
  94. * returns true on success, else false.
  95. */
  96. bool addSym(const char* symbol, const char* translation, int line);
  97.  
  98. /**
  99. * buildTables: builds the table for comp/jump codes and their translations.
  100. * returns true on success, else false.
  101. */
  102. bool buildTables(void);
  103.  
  104. /**
  105. * clearTables: frees the tables of comp/jump codes and their translations.
  106. */
  107. void clearTables(void);
  108.  
  109. /**
  110. * decodeA: reads in an A instruction from source, and outputs the a-instruction to out, converted to binary.
  111. * returns source line number, or -1 if error.
  112. */
  113. int decodeA(FILE* source, FILE* output, int line);
  114.  
  115. /**
  116. * writeComp: translates comp and outputs it to output.
  117. * returns true on success, else false.
  118. */
  119. bool writeComp(char* comp, FILE* output);
  120.  
  121. /**
  122. * writeJump: translates jump and outputs it to output.
  123. * returns true on success, else false.
  124. */
  125. bool writeJump(char* jump, FILE* output);
  126.  
  127. /**
  128. * decodeC: reads in a C instruction from source (first char is c), and outputs the C-instruction to out, converted to binary.
  129. * returns line number, or -1 on error.
  130. */
  131. int decodeC(char c, FILE* source, FILE* output, int line);
  132.  
  133. /**
  134. * loadLabels: populates the symbol dictionary with all of the labels in the file.
  135. * returns true on success, else false;
  136. */
  137. bool loadLabels(FILE* source);
  138.  
  139. /**
  140. * file: assemble.c
  141. * author: matthew.james.bird@gmail.com
  142. *
  143. * assembler for the hack assembly language.
  144. *
  145. * usage: assembler source output
  146. */
  147.  
  148. #include "assemble.h"
  149. #include <stdio.h>
  150. #include <stdbool.h> // bool type
  151. #include <ctype.h> // isspace(), isdigit()
  152. #include <stdlib.h> // atoi()
  153. #include <string.h> // strcpy(), strcmp(), strchr()
  154.  
  155.  
  156. // head for symbol dictionary linked list
  157. symNode* symHead;
  158.  
  159. // table for comp codes and their translations
  160. compNode* compDict[COMP_TABLE_SIZE];
  161.  
  162. // table for jump codes and their translations
  163. jumpNode* jumpDict[JUMP_TABLE_SIZE];
  164.  
  165. const char* compCodes[COMP_TABLE_SIZE] = {"0", "1", "-1", "D", "A", "!D", "!A", "-D", "-A",
  166. "D+1", "A+1", "D-1", "A-1", "D+A", "D-A", "A-D",
  167. "D&A", "D|A", "M", "!M", "-M", "M+1", "M-1", "D+M",
  168. "D-M", "M-D", "D&M", "D|M"};
  169.  
  170. const char* compTranslations[COMP_TABLE_SIZE] = {"0101010", "0111111", "0111010", "0001100",
  171. "0110000", "0001101", "0110001", "0001111",
  172. "0110011", "0011111", "0110111", "0001110",
  173. "0110010", "0000010", "0010011", "0010011",
  174. "0000000", "0010101", "1110000", "1110001",
  175. "1110011", "1110111", "1110010", "1000010",
  176. "1010011", "1000111", "1000000", "1010101"};
  177.  
  178. const char* jumpCodes[JUMP_TABLE_SIZE] = {"JGT", "JEQ", "JGE", "JLT", "JNE", "JLE", "JMP"};
  179.  
  180. const char* jumpTranslations[JUMP_TABLE_SIZE] = {"001", "010", "011", "100", "101", "110", "111"};
  181.  
  182. /**
  183. * addSym: add the symbol-translation pair to the start of the linked list beginning with head.
  184. * returns true on success, else false;
  185. */
  186. bool addSym(const char* symbol, const char* translation, int line)
  187. {
  188. // construct the new node
  189. symNode* temp = malloc(sizeof(symNode));
  190. if (temp == NULL)
  191. {
  192. fprintf(stderr, "Error (line %d): cannot malloc new symbol node.n", line);
  193. return false;
  194. }
  195. strcpy(temp->symbol, symbol);
  196. strcpy(temp->translation, translation);
  197.  
  198. if (symHead != NULL) // list not empty
  199. {
  200. temp->next = symHead;
  201. }
  202. symHead = temp;
  203. return true;
  204. }
  205.  
  206. /**
  207. * buildTables: builds the table for comp/jump codes and their translations.
  208. */
  209. bool buildTables(void)
  210. {
  211. int i;
  212.  
  213. // build comp table
  214. for (i = 0; i < COMP_TABLE_SIZE; i++)
  215. {
  216. compNode* temp = malloc(sizeof(compNode));
  217. if (temp == NULL)
  218. {
  219. fprintf(stderr, "Error: cannot create comp tablen");
  220. return false;
  221. }
  222. strcpy(temp->entry, compCodes[i]);
  223. strcpy(temp->translation, compTranslations[i]);
  224. compDict[i] = temp;
  225. }
  226.  
  227. // build jump table
  228. for (i = 0; i < JUMP_TABLE_SIZE; i++)
  229. {
  230. jumpNode* temp = malloc(sizeof(jumpNode));
  231. if (temp == NULL)
  232. {
  233. fprintf(stderr, "Error: cannot create jump tablen");
  234. return false;
  235. }
  236. strcpy(temp->entry, jumpCodes[i]);
  237. strcpy(temp->translation, jumpTranslations[i]);
  238. jumpDict[i] = temp;
  239. }
  240.  
  241. // load default register symbols into symbol table
  242. int v;
  243. int k;
  244. int j;
  245. for (i = 0; i < 16; i++)
  246. {
  247. char* tempSym = malloc(4);
  248. if (tempSym == NULL)
  249. {
  250. fprintf(stderr, "Error: cannot create register tablen");
  251. return false;
  252. }
  253.  
  254. char* tempTran = malloc(17);
  255. if (tempTran == NULL)
  256. {
  257. fprintf(stderr, "Error: cannot create register tablen");
  258. return false;
  259. }
  260.  
  261. tempSym[0] = 'R';
  262. sprintf(tempSym+1, "%d", i);
  263. v = i;
  264. k = 0;
  265. for (j = 15; j >= 0; j--, k++)
  266. {
  267. tempTran[k] = '0' + ((v >> j) & 1);
  268. }
  269. tempTran[k] = '';
  270. if (addSym(tempSym, tempTran, 0) == false)
  271. {
  272. fprintf(stderr, "Error: cannot create register tablen");
  273. return false;
  274. }
  275. }
  276. return true;
  277. }
  278.  
  279. /**
  280. * clearTables: frees the tables of comp/jump codes and their translations.
  281. */
  282. void clearTables(void)
  283. {
  284. // clear computations table
  285. int i;
  286. for (i = 0; i < COMP_TABLE_SIZE; i++)
  287. {
  288. free(compDict[i++]);
  289. }
  290.  
  291. // clear jump table
  292. for (i = 0; i < JUMP_TABLE_SIZE; i++)
  293. {
  294. free(jumpDict[i++]);
  295. }
  296.  
  297. // clear symbol table
  298. symNode* pos = symHead;
  299. symNode* next;
  300. while (pos != NULL)
  301. {
  302. next = pos->next;
  303. free(pos);
  304. pos = next;
  305. }
  306. }
  307.  
  308. /**
  309. * decodeA: reads in an A instruction from source, and outputs the a-instruction to out, converted to binary.
  310. * returns source line number, or -1 if error.
  311. */
  312. int decodeA(FILE* source, FILE* output, int line)
  313. {
  314. static int varNum = 16;
  315.  
  316. char* instruction = malloc(MAX_SYMBOL_SIZE + 1); //holds the number in the @instruction
  317. if (instruction == NULL)
  318. {
  319. fprintf(stderr, "Error (decodeA): cannot malloc instructionn");
  320. return -1;
  321. }
  322.  
  323. // read in the @ instruction
  324. int i = 0;
  325. char c;
  326. if ((c = fgetc(source)) && !isdigit(c)) // symbol
  327. {
  328. do
  329. {
  330. if (i > MAX_SYMBOL_SIZE)
  331. {
  332. fprintf(stderr, "Error (line %d): symbol too large (max length %d chars)n", line, MAX_SYMBOL_SIZE);
  333. return -1;
  334. }
  335. instruction[i++] = c;
  336. } while ((c = fgetc(source)) && !isspace(c) && c != EOF);
  337.  
  338. if (i == 0)
  339. {
  340. fprintf(stderr, "Error (line %d): expected value for A-instructionn", line);
  341. return -1;
  342. }
  343. instruction[i] = '';
  344.  
  345. // search table for instruction
  346. symNode* pos;
  347. for (pos = symHead; pos != NULL; pos = pos->next)
  348. {
  349. if (strcmp(instruction, pos->symbol) == 0)
  350. {
  351. fprintf(output, pos->translation);
  352. break;
  353. }
  354. }
  355. if (pos == NULL) // symbol not in table: add it!
  356. {
  357. char* tempTran = malloc(17);
  358. int k = 0;
  359. int j;
  360. int v = varNum;
  361. for (j = 15; j >= 0; j--, k++)
  362. {
  363. tempTran[k] = '0' + ((v >> j) & 1);
  364. }
  365. tempTran[k] = '';
  366. addSym(instruction, tempTran, 0);
  367. varNum++;
  368. // output symbol
  369. fprintf(output, tempTran);
  370. fputc('n', output);
  371. return line;
  372. }
  373. }
  374. if (isdigit(c)) // non-symbolic a-instruction
  375. {
  376. do
  377. {
  378. if (i > 4)
  379. {
  380. fprintf(stderr, "Error (line %d): integer too largen", line);
  381. return -1;
  382. }
  383. instruction[i++] = c;
  384. } while ((c = fgetc(source)) && isdigit(c));
  385.  
  386. if (i == 0)
  387. {
  388. fprintf(stderr, "Error (line %d): expected value for A-instructionn", line);
  389. return -1;
  390. }
  391. instruction[i] = '';
  392.  
  393. // convert the @ instruction to int
  394. int v = atoi(instruction);
  395. free(instruction);
  396. if (v > MAX_A || v < 0)
  397. {
  398. fprintf(stderr, "Error (line %d): %d is an invalid integern", line, v);
  399. return -1;
  400. }
  401.  
  402. // output the a-instruction converted to binary
  403. for (i = 15; i >= 0; i--)
  404. {
  405. fputc('0' + ((v >> i) & 1), output);
  406. }
  407. }
  408.  
  409. // carry on reading until newline
  410. while (c != 'n' && c != EOF)
  411. {
  412. c = fgetc(source);
  413. }
  414. if (c == 'n')
  415. {
  416. fputc('n', output);
  417. line++;
  418. }
  419.  
  420. return line;
  421. }
  422.  
  423. /**
  424. * writeComp: translates comp and outputs it to output.
  425. * returns true on success, else false.
  426. */
  427. bool writeComp(char* comp, FILE* output)
  428. {
  429. // search computations for the comp
  430. int i;
  431. for (i = 0; i < COMP_TABLE_SIZE; i++)
  432. {
  433. if (strcmp(compDict[i]->entry, comp) == 0)
  434. {
  435. // found
  436. fprintf(output, compDict[i]->translation);
  437. return true;
  438. }
  439. }
  440. // not found
  441. printf("%s not foundn", comp);
  442. return false;
  443. }
  444.  
  445. /**
  446. * writeJump: translates jump and outputs it to output.
  447. * returns true on success, else false.
  448. */
  449. bool writeJump(char* jump, FILE* output)
  450. {
  451. // search jump table for the jump
  452. int i;
  453. for (i = 0; i < JUMP_TABLE_SIZE; i++)
  454. {
  455. if (strcmp(jumpDict[i]->entry, jump) == 0)
  456. {
  457. // found
  458. fprintf(output, jumpDict[i]->translation);
  459. return true;
  460. }
  461. }
  462. // not found
  463. return false;
  464. }
  465.  
  466. /**
  467. * decodeC: reads in a C instruction from source (first char is c), and outputs the C-instruction to out, converted to binary.
  468. * returns line number, or -1 on error.
  469. */
  470. int decodeC(char c, FILE* source, FILE* output, int line)
  471. {
  472. // C-instructions have three parts: dest, comp, and jump.
  473. char* dest = malloc(4);
  474. char* comp = malloc(4);
  475. char* jump = malloc(4);
  476. if (dest == NULL || dest == NULL || jump == NULL)
  477. {
  478. fprintf(stderr, "Error (line: %d): cannot mallocn", line);
  479. }
  480.  
  481. char* buffer = malloc(4);
  482. int i = 0;
  483. bool destIn = false;
  484. bool compIn = false;
  485. bool jumpIn = false;
  486. do
  487. {
  488. if (i > 3)
  489. {
  490. fprintf(stderr, "Error (line: %d): invalid instructionn", line);
  491. }
  492. else if (c == '=') // buffer is dest
  493. {
  494. strcpy(dest, buffer);
  495. dest[i] = '';
  496. destIn = true;
  497. i = 0;
  498. }
  499. else if ((((c == 'n') || (c == '/') || c == EOF) && !compIn) || c == ';') // buffer is comp
  500. {
  501. strcpy(comp, buffer);
  502. comp[i] = '';
  503. compIn = true;
  504. i = 0;
  505. }
  506. else if (((c == 'n') || (c == '/') || c == EOF) && compIn) // buffer is jump
  507. {
  508. strcpy(jump, buffer);
  509. jump[i] = '';
  510. jumpIn = true;
  511. i = 0;
  512. }
  513. else if (!isspace(c) && c != '/')
  514. {
  515. buffer[i++] = c;
  516. }
  517. if (c == 'n' || c == '/' || c == EOF)
  518. {
  519. break;
  520. }
  521. }
  522. while (c = fgetc(source));
  523.  
  524. // write C-instruction code (111)
  525. fprintf(output, "111");
  526.  
  527. if (compIn)
  528. {
  529. if (writeComp(comp, output) == false)
  530. {
  531. fprintf(stderr, "Error (line: %d): cannot translate '%s'n", line, comp);
  532. return -1;
  533. }
  534. }
  535. else
  536. {
  537. // write default comp code
  538. fprintf(output, "111101010");
  539. }
  540.  
  541. if (destIn)
  542. {
  543. if (strchr(dest, 'A') != NULL)
  544. {
  545. fputc('1', output);
  546. }
  547. else
  548. {
  549. fputc('0', output);
  550. }
  551. if (strchr(dest, 'D') != NULL)
  552. {
  553. fputc('1', output);
  554. }
  555. else
  556. {
  557. fputc('0', output);
  558. }
  559. if (strchr(dest, 'M') != NULL)
  560. {
  561. fputc('1', output);
  562. }
  563. else
  564. {
  565. fputc('0', output);
  566. }
  567. }
  568. else
  569. {
  570. // write default dest
  571. fprintf(output, "000");
  572. }
  573.  
  574. if (jumpIn)
  575. {
  576. if (writeJump(jump, output) == false)
  577. {
  578. fprintf(stderr, "Error (line: %d): cannot translate jump '%s'n", line, jump);
  579. return -1;
  580. }
  581. }
  582. else
  583. {
  584. // write default jump
  585. fprintf(output, "000");
  586. }
  587.  
  588. free(dest);
  589. free(comp);
  590. free(jump);
  591. free(buffer);
  592.  
  593. fputc('n', output);
  594. line++;
  595. return line;
  596. }
  597.  
  598. /**
  599. * loadLabels: populates the symbol dictionary with all of the labels in the file.
  600. * returns true on success, else false.
  601. */
  602. bool loadLabels(FILE* source)
  603. {
  604. char* tempLabel;
  605. char* tempTran;
  606. int line = 0;
  607. bool definingLabel = false; // are we defining a label?
  608. bool comment = false; // are we in a comment?
  609. bool content = false; // is there content on the current line?
  610. bool addLabel = false; // should we add the current line to the label tag?
  611. int numLabels = 0;
  612. char c;
  613. int i = 0; // label pos
  614. while ((c = fgetc(source)) != EOF)
  615. {
  616. if (c == '/')
  617. {
  618. comment = true;
  619. }
  620. else if (c == '(' && !comment) // new label TODO: remove these && !comment
  621. {
  622. if (definingLabel)
  623. {
  624. fprintf(stderr, "Error (line %d): cannot enter '(' in label namen", line);
  625. return false;
  626. }
  627. definingLabel = true;
  628.  
  629. tempLabel = malloc(MAX_SYMBOL_SIZE + 1);
  630. if (tempLabel == NULL)
  631. {
  632. fprintf(stderr, "Error (line %d): cannot malloc tempLabeln", line);
  633. return false;
  634. }
  635. }
  636. else if (c == ')' && !comment)
  637. {
  638. if (!definingLabel)
  639. {
  640. fprintf(stderr, "Error (line %d): cannot enter ')' outside labeln", line);
  641. return false;
  642. }
  643. definingLabel = false;
  644.  
  645. // add to dict
  646. tempLabel[i] = '';
  647. i = 0;
  648. addSym(tempLabel, "", line);
  649. addLabel = true; // TODO: just have addLabel be numLabels > 0
  650. numLabels++;
  651. }
  652. else if (definingLabel && !comment)
  653. {
  654. if (isspace(c))
  655. {
  656. fprintf(stderr, "Error (line %d): cannot enter whitespace in label namen", line);
  657. return false;
  658. }
  659. else
  660. {
  661. tempLabel[i++] = c;
  662. }
  663. }
  664. if (c == 'n')
  665. {
  666. comment = false;
  667. if (content)
  668. {
  669. line++; // TODO: don't want to increment when we've done a label
  670. }
  671. content = false;
  672. }
  673. else if (!isspace(c) && !comment && !definingLabel && c != ')')
  674. {
  675. content = true;
  676. if (addLabel)
  677. {
  678. tempTran = malloc(17);
  679. int v = line;
  680. int k = 0;
  681. int j;
  682. for (j = 15; j >= 0; j--, k++)
  683. {
  684. tempTran[k] = '0' + ((v >> j) & 1);
  685. }
  686. tempTran[k] = '';
  687.  
  688. for (symNode* pos = symHead; numLabels > 0; numLabels--)
  689. {
  690. strcpy(pos->translation, tempTran);
  691. pos = pos->next;
  692. }
  693. addLabel = false;
  694. }
  695. }
  696. }
  697. if (addLabel)
  698. {
  699. tempTran = malloc(17);
  700. int v = line;
  701. int k = 0;
  702. int j;
  703. for (j = 15; j >= 0; j--, k++)
  704. {
  705. tempTran[k] = '0' + ((v >> j) & 1);
  706. }
  707. tempTran[k] = '';
  708.  
  709. for (symNode* pos = symHead; numLabels > 0; numLabels--)
  710. {
  711. strcpy(pos->translation, tempTran);
  712. pos = pos->next;
  713. }
  714. }
  715.  
  716. // rewind the file
  717. fseek(source, 0, SEEK_SET);
  718.  
  719. return true;
  720. }
  721.  
  722.  
  723. bool assemble(FILE* source, FILE* output)
  724. {
  725. // build translation tables
  726. if (buildTables() == false)
  727. {
  728. fprintf(stderr, "Terminating program due to errorn");
  729. return 1;
  730. }
  731.  
  732. if (loadLabels(source) == false)
  733. {
  734. fprintf(stderr, "Terminating program due to errorn");
  735. return 1;
  736. }
  737.  
  738. // main read loop
  739. char c;
  740. bool comment = false; // are we in a comment?
  741. bool label = false; // are we in a label?
  742. int line = 1; // source line number
  743. while ((c = fgetc(source)) != EOF)
  744. {
  745. if (c == '/')
  746. {
  747. comment = true;
  748. }
  749. else if (c == 'n')
  750. {
  751. line++;
  752. comment = false; // newline breaks comments
  753. }
  754. else if (c == '(')
  755. {
  756. label = true;
  757. }
  758. else if (c == ')')
  759. {
  760. label = false;
  761. }
  762. else if (label)
  763. {
  764. continue;
  765. }
  766. else if (isspace(c))
  767. {
  768. continue;
  769. }
  770. else if (comment)
  771. {
  772. continue; // skip comments
  773. }
  774. else if (c == '@') // A-INSTRUCTION
  775. {
  776. line = decodeA(source, output, line);
  777. }
  778. else // C-INSTRUCTION (or invalid)
  779. {
  780. line = decodeC(c, source, output, line);
  781. }
  782. if (line == -1)
  783. {
  784. fprintf(stderr, "Terminating assembly due to errorn");
  785. return false;
  786. }
  787. }
  788.  
  789. // clear translation tables
  790. clearTables();
  791.  
  792. fclose(source);
  793. fclose(output);
  794.  
  795. printf("Assembly successfuln");
  796. return true;
  797. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement