Advertisement
Guest User

Untitled

a guest
Apr 26th, 2019
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 23.51 KB | None | 0 0
  1. #define _CRT_SECURE_NO_WARNINGS
  2. #include <stdint.h>
  3. #include <stdlib.h>
  4. #include <stdarg.h>
  5. #include <stdio.h>
  6. #include <string.h>
  7. #include <assert.h>
  8.  
  9. struct Scanner {
  10. const char *filename;
  11. const char *source;
  12. const char *pos;
  13. };
  14.  
  15. enum TokenType {
  16. Tok_Invalid,
  17.  
  18. Tok_End,
  19. Tok_Identifier,
  20. Tok_Number,
  21.  
  22. Tok_Assign,
  23. Tok_Add,
  24. Tok_Sub,
  25. Tok_Mul,
  26. Tok_Div,
  27. Tok_Dot,
  28. Tok_Comma,
  29. Tok_LParen,
  30. Tok_RParen,
  31. Tok_LBracket,
  32. Tok_RBracket,
  33. Tok_LBlock,
  34. Tok_RBlock,
  35. Tok_Newline,
  36.  
  37. Tok_Kw_Def,
  38. Tok_Kw_New,
  39. Tok_Kw_Var,
  40. };
  41.  
  42. struct Token {
  43. Scanner *scanner;
  44. uint32_t begin, end;
  45. TokenType type;
  46. };
  47.  
  48. bool tokenEqual(Token *a, Token *b)
  49. {
  50. if (a->type != b->type) return false;
  51. uint32_t aLen = a->end - a->begin;
  52. uint32_t bLen = b->end - b->begin;
  53. if (aLen != bLen) return false;
  54. const char *aSrc = a->scanner->source + a->begin;
  55. const char *bSrc = b->scanner->source + b->begin;
  56. return !memcmp(aSrc, bSrc, aLen);
  57. }
  58.  
  59. static void errorAtV(Scanner *scanner, uint32_t offset, const char *message, va_list args)
  60. {
  61. char err[4096];
  62. vsprintf(err, message, args);
  63.  
  64. const char *src = scanner->source;
  65. uint32_t line = 0, col = 0;
  66. for (uint32_t i = 0; i < offset; i++) {
  67. if (src[i] == '\n') {
  68. line++;
  69. col = 0;
  70. } else {
  71. col++;
  72. }
  73. }
  74.  
  75. fprintf(stderr, "%s:%u:%u: %s\n", scanner->filename, line + 1, col + 1, err);
  76.  
  77. exit(1);
  78. }
  79.  
  80. static void errorAtOffset(Scanner *scanner, uint32_t offset, const char *message, ...)
  81. {
  82. va_list args;
  83. va_start(args, message);
  84. errorAtV(scanner, offset, message, args);
  85. va_end(args);
  86. }
  87.  
  88. static void errorAt(Token token, const char *message, ...)
  89. {
  90. va_list args;
  91. va_start(args, message);
  92. errorAtV(token.scanner, token.begin, message, args);
  93. va_end(args);
  94. }
  95.  
  96. Token lex(Scanner *scanner)
  97. {
  98. const char *begin = scanner->source;
  99. const char *p = scanner->pos;
  100.  
  101. whitespace:
  102. while (*p == ' ' || *p == '\t' || *p == '\r') {
  103. p++;
  104. }
  105. if (p[0] == '/' && p[1] == '/') {
  106. p += 2;
  107. while (*p != '\0' && *p != '\n') {
  108. p++;
  109. }
  110. goto whitespace;
  111. }
  112.  
  113. Token token;
  114. token.scanner = scanner;
  115. token.begin = (uint32_t)(p - begin);
  116.  
  117. if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || *p == '_') {
  118. const char *ident = p;
  119. uint32_t len;
  120.  
  121. token.type = Tok_Identifier;
  122. p++;
  123. while ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_') {
  124. p++;
  125. }
  126.  
  127. len = (uint32_t)(p - ident);
  128. if (len == 3 && !memcmp(ident, "def", 3)) token.type = Tok_Kw_Def;
  129. else if (len == 3 && !memcmp(ident, "new", 3)) token.type = Tok_Kw_New;
  130. else if (len == 3 && !memcmp(ident, "var", 3)) token.type = Tok_Kw_Var;
  131.  
  132. goto accept;
  133. }
  134.  
  135. if (*p >= '0' && *p <= '9') {
  136. token.type = Tok_Number;
  137. p++;
  138. while (*p >= '0' && *p <= '9') {
  139. p++;
  140. }
  141. goto accept;
  142. }
  143.  
  144. switch (*p) {
  145. case '\0':
  146. token.type = Tok_End;
  147. token.end = token.begin + 1;
  148. return token;
  149.  
  150. case '=': token.type = Tok_Assign; break;
  151. case '+': token.type = Tok_Add; break;
  152. case '-': token.type = Tok_Sub; break;
  153. case '*': token.type = Tok_Mul; break;
  154. case '/': token.type = Tok_Div; break;
  155. case '.': token.type = Tok_Dot; break;
  156. case ',': token.type = Tok_Comma; break;
  157. case '(': token.type = Tok_LParen; break;
  158. case ')': token.type = Tok_RParen; break;
  159. case '[': token.type = Tok_LBracket; break;
  160. case ']': token.type = Tok_RBracket; break;
  161. case '{': token.type = Tok_LBlock; break;
  162. case '}': token.type = Tok_RBlock; break;
  163. case '\n': token.type = Tok_Newline; break;
  164.  
  165. default:
  166. errorAtOffset(scanner, token.begin, "Unexpected character '%c' (0x%02x)", *p, *p);
  167. }
  168. p++;
  169.  
  170. accept:
  171. scanner->pos = p;
  172. token.end = (uint32_t)(p - begin);
  173. return token;
  174. }
  175.  
  176. struct Parser {
  177. Scanner *scanner;
  178. Token token;
  179. Token prev_token;
  180. };
  181.  
  182. static void errorAtCurrent(Parser *p, const char *message, ...)
  183. {
  184. va_list args;
  185. va_start(args, message);
  186. errorAtV(p->token.scanner, p->token.begin, message, args);
  187. va_end(args);
  188. }
  189.  
  190. static void errorAtPrev(Parser *p, const char *message, ...)
  191. {
  192. va_list args;
  193. va_start(args, message);
  194. errorAtV(p->prev_token.scanner, p->prev_token.begin, message, args);
  195. va_end(args);
  196. }
  197.  
  198. static bool accept(Parser *p, TokenType type)
  199. {
  200. if (p->token.type == type) {
  201. p->prev_token = p->token;
  202. p->token = lex(p->scanner);
  203. return true;
  204. }
  205.  
  206. return false;
  207. }
  208.  
  209. static void require(Parser *p, TokenType type, const char *message)
  210. {
  211. if (!accept(p, type)) {
  212. errorAtCurrent(p, "Expected %s", message);
  213. }
  214. }
  215.  
  216. enum AstType {
  217. Ast_Def,
  218. Ast_Block,
  219. Ast_Var,
  220. Ast_New,
  221. Ast_Assign,
  222. Ast_Binop,
  223. Ast_Index,
  224. Ast_Ident,
  225. Ast_Number,
  226. Ast_Call,
  227. Ast_Member,
  228. Ast_Paren,
  229. };
  230.  
  231. struct Ast {
  232. AstType type;
  233. Token token;
  234.  
  235. union {
  236.  
  237. struct {
  238. Token name;
  239. Ast *block;
  240. } def;
  241.  
  242. struct {
  243. Ast *stmt;
  244. uint32_t numStmt;
  245. } block;
  246.  
  247. struct {
  248. Token name;
  249. Ast *expr;
  250. } var;
  251.  
  252. struct {
  253. Ast *type;
  254. Ast *count;
  255. } new_;
  256.  
  257. struct {
  258. Ast *left;
  259. Ast *right;
  260. } assign;
  261.  
  262. struct {
  263. Token op;
  264. Ast *left;
  265. Ast *right;
  266. } binop;
  267.  
  268. struct {
  269. Ast *left;
  270. Ast *index;
  271. } index;
  272.  
  273. struct {
  274. Token name;
  275. } ident;
  276.  
  277. struct {
  278. Token value;
  279. } number;
  280.  
  281. struct {
  282. Ast *func;
  283. Ast *args;
  284. uint32_t numArgs;
  285. } call;
  286.  
  287. struct {
  288. Ast *expr;
  289. Token field;
  290. } member;
  291.  
  292. struct {
  293. Ast *expr;
  294. } paren;
  295.  
  296. };
  297. };
  298.  
  299. static Ast astPool[1024 * 16];
  300. static uint32_t astPoolIndex;
  301.  
  302. Ast *pushAst(AstType type, Token *token)
  303. {
  304. Ast *ast = &astPool[astPoolIndex++];
  305. ast->type = type;
  306. if (token) {
  307. ast->token = *token;
  308. }
  309. return ast;
  310. }
  311.  
  312. Ast *pushAsts(Ast *asts, uint32_t amount)
  313. {
  314. Ast *res = astPool + astPoolIndex;
  315. memcpy(res, asts, amount * sizeof(Ast));
  316. astPoolIndex += amount;
  317. return res;
  318. }
  319.  
  320. Ast *parseExpr(Parser *p);
  321.  
  322. Ast *parseType(Parser *p)
  323. {
  324. Ast *ast = NULL;
  325. if (accept(p, Tok_Identifier)) {
  326. ast = pushAst(Ast_Ident, &p->prev_token);
  327. ast->ident.name = p->prev_token;
  328. } else {
  329. errorAtCurrent(p, "Expected a type");
  330. }
  331. return ast;
  332. }
  333.  
  334. Ast *parseAtom(Parser *p)
  335. {
  336. Ast *ast = NULL;
  337. if (accept(p, Tok_Identifier)) {
  338. ast = pushAst(Ast_Ident, &p->prev_token);
  339. ast->ident.name = p->prev_token;
  340. } else if (accept(p, Tok_Number)) {
  341. ast = pushAst(Ast_Number, &p->prev_token);
  342. ast->number.value = p->prev_token;
  343. } else if (accept(p, Tok_LParen)) {
  344. ast = pushAst(Ast_Paren, &p->prev_token);
  345. ast->paren.expr = parseExpr(p);
  346. require(p, Tok_RParen, "closing paren");
  347. } else if (accept(p, Tok_Kw_New)) {
  348. ast = pushAst(Ast_New, &p->prev_token);
  349. ast->new_.type = parseType(p);
  350. if (accept(p, Tok_LBracket)) {
  351. ast->new_.count = parseExpr(p);
  352. require(p, Tok_RBracket, "closing bracket");
  353. }
  354. } else {
  355. errorAtCurrent(p, "Expected an expression");
  356. }
  357. return ast;
  358. }
  359.  
  360. Ast *parseCall(Parser *p)
  361. {
  362. Ast *left = parseAtom(p);
  363. for (;;) {
  364.  
  365. if (accept(p, Tok_LParen)) {
  366. Ast args[32];
  367. uint32_t numArgs = 0;
  368.  
  369. Ast *call = pushAst(Ast_Call, &p->prev_token);
  370. call->call.func = left;
  371.  
  372. if (!accept(p, Tok_RParen)) {
  373. do {
  374. args[numArgs] = *parseExpr(p);
  375. numArgs++;
  376. } while (accept(p, Tok_Comma));
  377.  
  378. require(p, Tok_RParen, "closing paren");
  379. }
  380.  
  381. call->call.args = pushAsts(args, numArgs);
  382. call->call.numArgs = numArgs;
  383. left = call;
  384. } else if (accept(p, Tok_LBracket)) {
  385. Ast *index = pushAst(Ast_Index, &p->prev_token);
  386. index->index.left = left;
  387. index->index.index = parseExpr(p);
  388. require(p, Tok_RBracket, "closing bracket");
  389. left = index;
  390. } else if (accept(p, Tok_Dot)) {
  391. Ast *member = pushAst(Ast_Member, &p->prev_token);
  392. member->member.expr = left;
  393. require(p, Tok_Identifier, "member name");
  394. member->member.field = p->prev_token;
  395. left = member;
  396. } else {
  397. break;
  398. }
  399.  
  400. }
  401. return left;
  402. }
  403.  
  404. Ast *parseTerm(Parser *p)
  405. {
  406. Ast *left = parseCall(p);
  407. while (accept(p, Tok_Mul) || accept(p, Tok_Div)) {
  408. Ast *binop = pushAst(Ast_Binop, &p->prev_token);
  409. binop->binop.op = binop->token;
  410. binop->binop.left = left;
  411. binop->binop.right = parseCall(p);
  412. left = binop;
  413. }
  414. return left;
  415. }
  416.  
  417. Ast *parseSum(Parser *p)
  418. {
  419. Ast *left = parseTerm(p);
  420. while (accept(p, Tok_Add) || accept(p, Tok_Sub)) {
  421. Ast *binop = pushAst(Ast_Binop, &p->prev_token);
  422. binop->binop.op = binop->token;
  423. binop->binop.left = left;
  424. binop->binop.right = parseTerm(p);
  425. left = binop;
  426. }
  427. return left;
  428. }
  429.  
  430. Ast *parseExpr(Parser *p)
  431. {
  432. Ast *expr = parseSum(p);
  433.  
  434. if (accept(p, Tok_Assign)) {
  435. errorAtPrev(p, "Illegal assignment in expression");
  436. }
  437.  
  438. return expr;
  439. }
  440.  
  441. Ast *parseStmt(Parser *p)
  442. {
  443. Ast *expr = parseSum(p);
  444.  
  445. if (accept(p, Tok_Assign)) {
  446. Ast *assign = pushAst(Ast_Assign, &p->prev_token);
  447. assign->assign.left = expr;
  448. assign->assign.right = parseExpr(p);
  449. expr = assign;
  450. }
  451.  
  452. return expr;
  453. }
  454.  
  455. Ast *finishBlock(Parser *p)
  456. {
  457. Ast *block = pushAst(Ast_Block, &p->prev_token);
  458.  
  459. Ast stmt[128];
  460. uint32_t numStmt = 0;
  461.  
  462. while (!accept(p, Tok_RBlock)) {
  463. Ast *ast = NULL;
  464.  
  465. if (accept(p, Tok_Newline)) {
  466. /* Nop */
  467. } else if (accept(p, Tok_Kw_Var)) {
  468. ast = pushAst(Ast_Var, &p->prev_token);
  469. require(p, Tok_Identifier, "variable name");
  470. ast->var.name = p->prev_token;
  471. if (accept(p, Tok_Assign)) {
  472. ast->var.expr = parseExpr(p);
  473. }
  474. } else {
  475. ast = parseStmt(p);
  476. require(p, Tok_Newline, "expected newline after statement");
  477. }
  478.  
  479. if (ast) {
  480. stmt[numStmt] = *ast;
  481. numStmt++;
  482. }
  483. }
  484.  
  485. block->block.stmt = pushAsts(stmt, numStmt);
  486. block->block.numStmt = numStmt;
  487. return block;
  488. }
  489.  
  490. Ast *parseTop(Parser *p)
  491. {
  492. if (accept(p, Tok_Kw_Def)) {
  493. Ast *def = pushAst(Ast_Def, &p->prev_token);
  494. require(p, Tok_Identifier, "name for function");
  495. def->def.name = p->prev_token;
  496.  
  497. require(p, Tok_LParen, "argument list");
  498. /* TODO */
  499. require(p, Tok_RParen, "closing argument list");
  500.  
  501. require(p, Tok_LBlock, "function body");
  502. def->def.block = finishBlock(p);
  503. return def;
  504. } else if (accept(p, Tok_Newline)) {
  505. /* Nop */
  506. return NULL;
  507. } else {
  508. errorAtCurrent(p, "Expected top-level declaration");
  509. return NULL;
  510. }
  511. }
  512.  
  513. Ast *parse(Scanner *scanner)
  514. {
  515. Parser parser, *p = &parser;
  516. p->scanner = scanner;
  517. p->prev_token.type = Tok_Invalid;
  518. p->token = lex(scanner);
  519.  
  520. Ast *block = pushAst(Ast_Block, NULL);
  521. block->token.type = Tok_Invalid;
  522.  
  523. Ast stmt[128];
  524. uint32_t numStmt = 0;
  525.  
  526. while (!accept(p, Tok_End)) {
  527. Ast *top = parseTop(p);
  528. if (top) {
  529. stmt[numStmt] = *top;
  530. numStmt++;
  531. }
  532. }
  533.  
  534. block->block.stmt = pushAsts(stmt, numStmt);
  535. block->block.numStmt = numStmt;
  536. return block;
  537. }
  538.  
  539. void printIndent(int indent) {
  540. for (int i = 0; i < indent; i++) {
  541. printf("%s", " ");
  542. }
  543. }
  544.  
  545. void printAst(Ast *ast, int indent)
  546. {
  547. switch (ast->type)
  548. {
  549. case Ast_Def:
  550. printf("def %.*s() ", ast->def.name.end - ast->def.name.begin, ast->def.name.scanner->source + ast->def.name.begin);
  551. printAst(ast->def.block, indent + 1);
  552. break;
  553.  
  554. case Ast_Block:
  555. if (ast->token.type != Tok_Invalid) printf("{\n");
  556. for (uint32_t i = 0; i < ast->block.numStmt; i++) {
  557. printIndent(indent);
  558. printAst(ast->block.stmt + i, indent);
  559. putchar('\n');
  560. }
  561. printIndent(indent - 1);
  562. if (ast->token.type != Tok_Invalid) printf("}");
  563. break;
  564.  
  565. case Ast_Var:
  566. printf("var %.*s", ast->var.name.end - ast->var.name.begin, ast->var.name.scanner->source + ast->var.name.begin);
  567. if (ast->var.expr) {
  568. printf(" = ");
  569. printAst(ast->var.expr, indent);
  570. }
  571. break;
  572.  
  573. case Ast_New:
  574. printf("new ");
  575. printAst(ast->new_.type, indent);
  576. if (ast->new_.count) {
  577. putchar('[');
  578. printAst(ast->new_.count, indent);
  579. putchar(']');
  580. }
  581. break;
  582.  
  583. case Ast_Assign:
  584. printAst(ast->assign.left, indent);
  585. printf(" = ");
  586. printAst(ast->assign.right, indent);
  587. break;
  588.  
  589. case Ast_Binop:
  590. printAst(ast->binop.left, indent);
  591. printf(" %.*s ", ast->binop.op.end - ast->binop.op.begin, ast->binop.op.scanner->source + ast->binop.op.begin);
  592. printAst(ast->binop.right, indent);
  593. break;
  594.  
  595. case Ast_Index:
  596. printAst(ast->index.left, indent);
  597. putchar('[');
  598. printAst(ast->index.index, indent);
  599. putchar(']');
  600. break;
  601.  
  602. case Ast_Ident:
  603. printf("%.*s", ast->ident.name.end - ast->ident.name.begin, ast->ident.name.scanner->source + ast->ident.name.begin);
  604. break;
  605.  
  606. case Ast_Number:
  607. printf("%.*s", ast->number.value.end - ast->number.value.begin, ast->number.value.scanner->source + ast->number.value.begin);
  608. break;
  609.  
  610. case Ast_Call:
  611. printAst(ast->index.left, indent);
  612. putchar('(');
  613. for (uint32_t i = 0; i < ast->call.numArgs; i++) {
  614. printAst(ast->call.args + i, indent);
  615. }
  616. putchar(')');
  617. break;
  618.  
  619. case Ast_Member:
  620. printAst(ast->member.expr, indent);
  621. putchar('.');
  622. printf("%.*s", ast->member.field.end - ast->member.field.begin, ast->member.field.scanner->source + ast->member.field.begin);
  623. break;
  624.  
  625. case Ast_Paren:
  626. putchar('(');
  627. printAst(ast->paren.expr, indent);
  628. putchar(')');
  629. break;
  630.  
  631. }
  632. }
  633.  
  634. struct Slice {
  635. void *data;
  636. uint32_t offset, count;
  637. };
  638.  
  639. enum TypeType {
  640. Type_Unit,
  641. Type_Int,
  642. Type_Slice,
  643. };
  644.  
  645. struct Type {
  646. TypeType type;
  647.  
  648. union {
  649.  
  650. struct {
  651. Type *type;
  652. } slice;
  653.  
  654. };
  655. };
  656.  
  657. bool typeEqual(Type *a, Type *b)
  658. {
  659. if (a->type != b->type) return false;
  660.  
  661. switch (a->type) {
  662. case Type_Unit: return true;
  663. case Type_Int: return true;
  664.  
  665. case Type_Slice:
  666. return typeEqual(a->slice.type, b->slice.type);
  667. }
  668.  
  669. assert(0);
  670. return false;
  671. }
  672.  
  673. uint32_t typeSize(Type *type)
  674. {
  675. switch (type->type) {
  676. case Type_Unit:
  677. return 0;
  678.  
  679. case Type_Int:
  680. return 4;
  681.  
  682. case Type_Slice:
  683. return sizeof(Slice);
  684. }
  685.  
  686. assert(0);
  687. return 0;
  688. }
  689.  
  690. Type intType = { Type_Int };
  691. Type unitType = { Type_Unit };
  692.  
  693. static Type typePool[1024 * 16];
  694. static uint32_t typePoolIndex;
  695.  
  696. Type *pushType(TypeType type)
  697. {
  698. Type *res = &typePool[typePoolIndex++];
  699. res->type = type;
  700. return res;
  701. }
  702.  
  703. struct Local
  704. {
  705. Token name;
  706. Type *type;
  707. uint16_t stackOffset;
  708. };
  709.  
  710. struct Codegen
  711. {
  712. Local locals[128];
  713. uint32_t numLocals;
  714. uint16_t stackTop;
  715.  
  716. uint16_t code[1024];
  717. uint32_t codeLen;
  718. };
  719.  
  720. enum Op
  721. {
  722. Op_Invalid,
  723.  
  724. Op_LocalRef,
  725. Op_SliceRef,
  726.  
  727. Op_MakeSlice,
  728. Op_PopSlice,
  729. Op_LoadSlice,
  730. Op_StoreSlice,
  731. Op_DropSlice,
  732.  
  733. Op_ImmInt,
  734. Op_PopInt,
  735. Op_LoadInt,
  736. Op_StoreInt,
  737. Op_AddInt,
  738. Op_SubInt,
  739. Op_MulInt,
  740. Op_PrintInt,
  741.  
  742. Op_Return,
  743. };
  744.  
  745. void emit0(Codegen *c, Op op)
  746. {
  747. c->code[c->codeLen + 0] = (uint16_t)op;
  748. c->codeLen += 1;
  749. }
  750.  
  751. void emit1(Codegen *c, Op op, uint16_t a0)
  752. {
  753. c->code[c->codeLen + 0] = (uint16_t)op;
  754. c->code[c->codeLen + 1] = a0;
  755. c->codeLen += 2;
  756. }
  757.  
  758. void emit2(Codegen *c, Op op, uint16_t a0, uint16_t a1)
  759. {
  760. c->code[c->codeLen + 0] = (uint16_t)op;
  761. c->code[c->codeLen + 1] = a0;
  762. c->code[c->codeLen + 2] = a1;
  763. c->codeLen += 3;
  764. }
  765.  
  766. Type *compileType(Codegen *c, Ast *ast)
  767. {
  768. switch (ast->type) {
  769.  
  770. case Ast_Ident: {
  771. if (ast->ident.name.end - ast->ident.name.begin == 3 && !memcmp(ast->ident.name.scanner->source + ast->ident.name.begin, "Int", 3)) {
  772. return &intType;
  773. } else {
  774. errorAt(ast->token, "Unknown type name");
  775. }
  776. }
  777.  
  778. }
  779.  
  780. errorAt(ast->token, "Unknown type");
  781. return NULL;
  782. }
  783.  
  784. Type *compileLValue(Codegen *c, Ast *ast);
  785.  
  786.  
  787. Type *compileRValue(Codegen *c, Ast *ast)
  788. {
  789. switch (ast->type) {
  790.  
  791. case Ast_Call: {
  792. Type *argTypes[32];
  793.  
  794. for (uint32_t i = 0; i < ast->call.numArgs; i++) {
  795. argTypes[i] = compileRValue(c, ast->call.args + i);
  796. }
  797.  
  798. if (ast->call.func->type == Ast_Ident
  799. && ast->call.func->ident.name.end - ast->call.func->ident.name.begin == 5
  800. && !memcmp(ast->call.func->ident.name.scanner->source + ast->call.func->ident.name.begin, "print", 5)) {
  801.  
  802. if (ast->call.numArgs != 1) {
  803. errorAt(ast->token, "Invalid amount of arguments to print");
  804. }
  805.  
  806. if (argTypes[0]->type == Type_Int) {
  807. emit0(c, Op_PrintInt);
  808. } else {
  809. errorAt(ast->call.args[0].token, "Invalid type to print");
  810. }
  811.  
  812. } else {
  813. errorAt(ast->call.func->token, "Unimplemented");
  814. }
  815.  
  816. return &unitType;
  817. } break;
  818.  
  819. case Ast_Number: {
  820. int value = 0;
  821.  
  822. for (uint32_t i = ast->number.value.begin; i != ast->number.value.end; i++)
  823. value = value * 10 + (ast->number.value.scanner->source[i] - '0');
  824.  
  825. emit1(c, Op_ImmInt, (uint16_t)(int16_t)value);
  826.  
  827. return &intType;
  828. } break;
  829.  
  830. case Ast_New: {
  831. Type *sliceType = pushType(Type_Slice);
  832. sliceType->slice.type = compileType(c, ast->new_.type);
  833.  
  834. compileRValue(c, ast->new_.count);
  835. emit1(c, Op_MakeSlice, typeSize(sliceType->slice.type));
  836.  
  837. return sliceType;
  838. } break;
  839.  
  840. case Ast_Ident:
  841. case Ast_Index: {
  842. Type *type = compileLValue(c, ast);
  843.  
  844. if (type->type == Type_Int) {
  845. emit0(c, Op_LoadInt);
  846. } else if (type->type == Type_Slice) {
  847. emit0(c, Op_LoadSlice);
  848. } else {
  849. errorAt(ast->token, "Invalid rvalue type");
  850. }
  851.  
  852. return type;
  853. } break;
  854.  
  855. case Ast_Paren: {
  856. return compileRValue(c, ast->paren.expr);
  857. } break;
  858.  
  859. case Ast_Binop: {
  860. Type *lType = compileRValue(c, ast->binop.left);
  861. Type *rType = compileRValue(c, ast->binop.right);
  862. if (!typeEqual(lType, rType)) {
  863. errorAt(ast->token, "Type mismatch");
  864. }
  865.  
  866. if (lType->type == Type_Int) {
  867. Op op = Op_Invalid;
  868. switch (ast->binop.op.type) {
  869. case Tok_Add: op = Op_AddInt; break;
  870. case Tok_Sub: op = Op_SubInt; break;
  871. case Tok_Mul: op = Op_MulInt; break;
  872. default:
  873. errorAt(ast->binop.op, "Invalid operator");
  874. }
  875. emit0(c, op);
  876. }
  877.  
  878. return lType;
  879. } break;
  880.  
  881. }
  882.  
  883. errorAt(ast->token, "Invalid rvalue");
  884. return NULL;
  885. }
  886.  
  887. Type *compileLValue(Codegen *c, Ast *ast)
  888. {
  889. switch (ast->type) {
  890.  
  891. case Ast_Ident: {
  892. uint32_t i;
  893. for (i = c->numLocals; i > 0; --i) {
  894. Local *local = &c->locals[i - 1];
  895. if (tokenEqual(&local->name, &ast->ident.name)) break;
  896. }
  897. if (i == 0) {
  898. errorAt(ast->ident.name, "undefined variable");
  899. }
  900. Local *local = &c->locals[i - 1];
  901. emit1(c, Op_LocalRef, local->stackOffset);
  902. return local->type;
  903. } break;
  904.  
  905. case Ast_Index: {
  906. Type *sliceType = compileLValue(c, ast->index.left);
  907. if (sliceType->type == Type_Slice) {
  908. Type *indexType = compileRValue(c, ast->index.index);
  909. if (indexType->type != Type_Int) {
  910. errorAt(ast->index.index->token, "Index type is not an integer");
  911. }
  912.  
  913. emit1(c, Op_SliceRef, (uint16_t)typeSize(sliceType->slice.type));
  914. return sliceType->slice.type;
  915. } else {
  916. errorAt(ast->token, "Cannot index type");
  917. }
  918. } break;
  919.  
  920.  
  921. }
  922.  
  923. errorAt(ast->token, "Invalid lvalue");
  924. return NULL;
  925. }
  926.  
  927. void compileStmt(Codegen *c, Ast *ast)
  928. {
  929. switch (ast->type) {
  930.  
  931. case Ast_Var: {
  932. Type *exprType = compileRValue(c, ast->var.expr);
  933.  
  934. emit1(c, Op_LocalRef, c->stackTop);
  935. if (exprType->type == Type_Int) {
  936. emit0(c, Op_StoreInt);
  937. } else if (exprType->type == Type_Slice) {
  938. emit0(c, Op_StoreSlice);
  939. } else {
  940. errorAt(ast->token, "Invalid expression type");
  941. }
  942.  
  943. c->locals[c->numLocals].name = ast->var.name;
  944. c->locals[c->numLocals].stackOffset = c->stackTop;
  945. c->locals[c->numLocals].type = exprType;
  946. c->numLocals++;
  947. c->stackTop += (typeSize(exprType) + 7) & 7;
  948. } break;
  949.  
  950. case Ast_Assign: {
  951. Type *rType = compileRValue(c, ast->assign.right);
  952. Type *lType = compileLValue(c, ast->assign.left);
  953. if (!typeEqual(lType, rType)) {
  954. errorAt(ast->token, "Type mismatch");
  955. }
  956.  
  957. if (lType->type == Type_Int) {
  958. emit0(c, Op_StoreInt);
  959. } else if (lType->type == Type_Slice) {
  960. emit0(c, Op_StoreSlice);
  961. } else {
  962. errorAt(ast->token, "Invalid expression type");
  963. }
  964. } break;
  965.  
  966. case Ast_Block: {
  967. uint32_t prevTop = c->stackTop;
  968. uint32_t prevNum = c->numLocals;
  969.  
  970. for (uint32_t i = 0; i < ast->block.numStmt; i++)
  971. compileStmt(c, ast->block.stmt + i);
  972.  
  973. for (uint32_t i = c->numLocals; i > prevNum; --i) {
  974. Local *local = &c->locals[i - 1];
  975.  
  976. if (local->type->type == Type_Slice) {
  977. emit1(c, Op_LocalRef, local->stackOffset);
  978. emit0(c, Op_DropSlice);
  979. }
  980. }
  981.  
  982. c->numLocals = prevNum;
  983. c->stackTop = prevTop;
  984. } break;
  985.  
  986. default: {
  987. Type *type = compileRValue(c, ast);
  988.  
  989. if (type->type == Type_Int) {
  990. emit0(c, Op_PopInt);
  991. } else if (type->type == Type_Slice) {
  992. emit0(c, Op_PopSlice);
  993. } else if (type->type == Type_Unit) {
  994. /* Nop */
  995. } else {
  996. errorAt(ast->token, "Invalid expression type");
  997. }
  998.  
  999. }
  1000.  
  1001. }
  1002. }
  1003.  
  1004. void compileTop(Codegen *c, Ast *ast)
  1005. {
  1006. switch (ast->type) {
  1007.  
  1008. case Ast_Def: {
  1009. compileStmt(c, ast->def.block);
  1010. emit0(c, Op_Return);
  1011. } break;
  1012.  
  1013. }
  1014. }
  1015.  
  1016. void printDisassembly(const uint16_t *code, uint32_t length)
  1017. {
  1018. const uint16_t *end = code + length;
  1019. while (code != end) {
  1020.  
  1021. switch (*code) {
  1022. case Op_Invalid: printf("invalid"); code++; break;
  1023.  
  1024. case Op_LocalRef: printf("ref.local %u", code[1]); code += 2; break;
  1025. case Op_SliceRef: printf("ref.slice %u", code[1]); code += 2; break;
  1026.  
  1027. case Op_MakeSlice: printf("slice.make %u", code[1]); code += 2; break;
  1028. case Op_PopSlice: printf("slice.pop"); code++; break;
  1029. case Op_LoadSlice: printf("slice.load"); code++; break;
  1030. case Op_StoreSlice: printf("slice.store"); code++; break;
  1031. case Op_DropSlice: printf("slice.drop"); code++; break;
  1032.  
  1033. case Op_ImmInt: printf("int.imm %d", (int)(int16_t)code[1]); code += 2; break;
  1034. case Op_PopInt: printf("int.pop"); code++; break;
  1035. case Op_LoadInt: printf("int.load"); code++; break;
  1036. case Op_StoreInt: printf("int.store"); code++; break;
  1037. case Op_AddInt: printf("int.add"); code++; break;
  1038. case Op_SubInt: printf("int.sub"); code++; break;
  1039. case Op_MulInt: printf("int.mul"); code++; break;
  1040. case Op_PrintInt: printf("int.print"); code++; break;
  1041.  
  1042. case Op_Return: printf("return"); code++; break;
  1043. }
  1044.  
  1045. putchar('\n');
  1046. }
  1047. }
  1048.  
  1049. void execute(const uint16_t *code)
  1050. {
  1051. char localStack[1024];
  1052.  
  1053. void *refStack[16];
  1054. uint32_t refTop = 0;
  1055.  
  1056. int32_t intStack[16];
  1057. int32_t intTop = 0;
  1058.  
  1059. char structStack[128];
  1060. uint32_t structTop = 0;
  1061.  
  1062. for (;;) {
  1063.  
  1064. switch (*code) {
  1065. case Op_Invalid: {
  1066. assert(0);
  1067. } break;
  1068.  
  1069. case Op_LocalRef: {
  1070. refStack[refTop++] = localStack + code[1];
  1071. code += 2;
  1072. } break;
  1073.  
  1074. case Op_SliceRef: {
  1075. Slice *slice = (Slice*)refStack[refTop - 1];
  1076. int32_t index = intStack[intTop - 1];
  1077. assert((uint32_t)index < slice->count);
  1078. refStack[refTop - 1] = (char*)slice->data + slice->offset + code[1] * index;
  1079. intTop--;
  1080. code += 2;
  1081. } break;
  1082.  
  1083. case Op_MakeSlice: {
  1084. Slice slice;
  1085. int32_t count = intStack[intTop - 1];
  1086. slice.data = calloc(count, code[1]);
  1087. slice.offset = 0;
  1088. slice.count = count;
  1089.  
  1090. memcpy(structStack + structTop, &slice, sizeof(Slice));
  1091. structTop += sizeof(Slice);
  1092. intTop--;
  1093. code += 2;
  1094. } break;
  1095.  
  1096. case Op_PopSlice: {
  1097. structTop -= sizeof(Slice);
  1098. refTop--;
  1099. code++;
  1100. } break;
  1101.  
  1102. case Op_LoadSlice: {
  1103. Slice *slice = (Slice*)refStack[refTop - 1];
  1104. memcpy(structStack + structTop, slice, sizeof(Slice));
  1105. structTop += sizeof(Slice);
  1106. refTop--;
  1107. code++;
  1108. } break;
  1109.  
  1110. case Op_StoreSlice: {
  1111. Slice *slice = (Slice*)refStack[refTop - 1];
  1112. structTop -= sizeof(Slice);
  1113. memcpy(slice, structStack + structTop, sizeof(Slice));
  1114. refTop--;
  1115. code++;
  1116. } break;
  1117.  
  1118. case Op_DropSlice: {
  1119. Slice *slice = (Slice*)refStack[refTop - 1];
  1120. free(slice->data);
  1121. refTop--;
  1122. code++;
  1123. } break;
  1124.  
  1125. case Op_ImmInt: {
  1126. intStack[intTop] = (int16_t)code[1];
  1127. intTop++;
  1128. code += 2;
  1129. } break;
  1130.  
  1131. case Op_PopInt: {
  1132. intTop--;
  1133. code++;
  1134. } break;
  1135.  
  1136. case Op_LoadInt: {
  1137. intStack[intTop] = *(int32_t*)refStack[refTop - 1];
  1138. intTop++;
  1139. refTop--;
  1140. code++;
  1141. } break;
  1142.  
  1143. case Op_StoreInt: {
  1144. *(int32_t*)refStack[refTop - 1] = intStack[intTop - 1];
  1145. intTop--;
  1146. code++;
  1147. } break;
  1148.  
  1149. case Op_AddInt: {
  1150. intStack[intTop - 2] = intStack[intTop - 2] + intStack[intTop - 1];
  1151. intTop--;
  1152. code++;
  1153. } break;
  1154.  
  1155. case Op_SubInt: {
  1156. intStack[intTop - 2] = intStack[intTop - 2] - intStack[intTop - 1];
  1157. intTop--;
  1158. code++;
  1159. } break;
  1160.  
  1161. case Op_MulInt: {
  1162. intStack[intTop - 2] = intStack[intTop - 2] * intStack[intTop - 1];
  1163. intTop--;
  1164. code++;
  1165. } break;
  1166.  
  1167. case Op_PrintInt: {
  1168. printf("%d\n", intStack[intTop - 1]);
  1169. intTop--;
  1170. code++;
  1171. } break;
  1172.  
  1173. case Op_Return: {
  1174. return;
  1175. } break;
  1176. }
  1177. }
  1178. }
  1179.  
  1180. const char *testSrc = R"(
  1181.  
  1182. def main() {
  1183. var data = new Int[2]
  1184.  
  1185. data[0] = (1 + 2) * 3
  1186. data[1] = 1 + 2 * 3
  1187.  
  1188. print(data[0] - data[1])
  1189. }
  1190.  
  1191. )";
  1192.  
  1193. int main(int argc, char **argv)
  1194. {
  1195. Scanner scanner;
  1196. scanner.filename = "test";
  1197. scanner.source = testSrc;
  1198. scanner.pos = scanner.source;
  1199. Ast *ast = parse(&scanner);
  1200. printAst(ast, 0);
  1201.  
  1202. Codegen codegen = { };
  1203. compileTop(&codegen, &ast->block.stmt[0]);
  1204.  
  1205. printDisassembly(codegen.code, codegen.codeLen);
  1206.  
  1207. execute(codegen.code);
  1208.  
  1209. return 0;
  1210. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement