Advertisement
Guest User

lambda calculus compiler in 511 lines of C

a guest
May 31st, 2017
177
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 11.16 KB | None | 0 0
  1. // compile lambda calculus in a few lines (^:
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <ctype.h>
  5. #include <string.h>
  6. #include <setjmp.h>
  7.  
  8.  
  9. // syntax tree structures and utility functions
  10.  
  11. typedef enum {
  12.   APPLICATION, LAMBDA, SYMBOL
  13. } expr_type;
  14.  
  15. struct cenv {
  16.   char* sym_name;
  17.   struct cenv* next;
  18. };
  19.  
  20. struct lambda {
  21.   char* arg;
  22.   struct expr* body;
  23. };
  24.  
  25. struct appl {
  26.   struct expr* operator;
  27.   struct expr* operand;
  28. };
  29.  
  30. struct expr {
  31.   expr_type typ;
  32.   union {
  33.     char* sym;
  34.     struct appl* app;
  35.     struct lambda* lam;
  36.   };
  37. };
  38.  
  39. void print_expr(struct expr* exp) {
  40.   switch(exp->typ) {
  41.   case SYMBOL:
  42.     printf("%s", exp->sym);
  43.     break;
  44.   case LAMBDA:
  45.     printf("\\%s ", exp->lam->arg);
  46.     print_expr(exp->lam->body);
  47.     break;
  48.   case APPLICATION:
  49.     printf("("); print_expr(exp->app->operator);
  50.     printf(" ");
  51.     print_expr(exp->app->operand);
  52.     printf(")");
  53.     break;
  54.   }
  55. }
  56.  
  57.  
  58. // lexing/parsing utility functions
  59.  
  60. char peekchar() {
  61.   char c = getchar();
  62.   ungetc(c, stdin);
  63.   return c;
  64. }
  65.  
  66. void skip_whitespace() {
  67.   char c = getchar();
  68.   if(isspace(c)) {
  69.     while(isspace(c)) {
  70.       c = getchar();
  71.     }
  72.     ungetc(c, stdin);
  73.   } else {
  74.     ungetc(c, stdin);
  75.   }
  76. }
  77.  
  78. // hack for error handling
  79. // if we ever encounter an error in parsing or evaluation
  80. // longjmp to this buffer
  81. jmp_buf error;
  82.  
  83. // syntax
  84.  
  85. // lambda: '\' symbol expr
  86. // application: '(' expr expr ')'
  87. // symbol: a-z[^\s\(\)\\]*
  88.  
  89. struct expr* parse_expression();
  90.  
  91. char reserved_chars[] = "(\\).";
  92.  
  93. struct expr* parse_symbol() {
  94.   char c = getchar();
  95.   if(isalpha(c)) {
  96.    
  97.     int buf_size = 32;
  98.     char *buf = malloc(sizeof(char)*buf_size);
  99.     int idx = 0;
  100.     buf[idx++] = c;
  101.    
  102.     c = getchar();
  103.     while(!isspace(c) && (strchr(reserved_chars, c) == NULL)) {
  104.       // always leave one spot for null terminator
  105.       if(idx >= (buf_size-1)) {
  106.         buf_size *= 1.5;
  107.         buf = realloc(buf, sizeof(char)*buf_size);
  108.       }
  109.       buf[idx++] = c;
  110.       c = getchar();
  111.     }
  112.    
  113.     ungetc(c, stdin);
  114.     buf[idx++] = '\0';
  115.    
  116.     struct expr* res = malloc(sizeof(struct expr));
  117.     res->typ = SYMBOL;
  118.     res->sym = buf;
  119.     return res;
  120.    
  121.   } else {
  122.     printf("Unexpected character '%c'\n", c);
  123.     longjmp(error, 1);
  124.   }
  125. }
  126.  
  127. struct expr* parse_lambda() {
  128.   // consume '\'
  129.   getchar();
  130.  
  131.   skip_whitespace();
  132.   struct expr* sym_expr = parse_symbol();
  133.  
  134.   // parse symbol
  135.   struct expr* lam_expr = malloc(sizeof(struct expr));
  136.   lam_expr->lam = malloc(sizeof(struct lambda));
  137.   lam_expr->typ = LAMBDA;
  138.   lam_expr->lam->arg = sym_expr->sym;
  139.  
  140.   // parse body of lambda
  141.   struct expr* body = parse_expression();
  142.   lam_expr->lam->body = body;
  143.  
  144.   return lam_expr;
  145. }
  146.  
  147. struct expr* parse_application() {
  148.   // consume '('
  149.   getchar();
  150.  
  151.   struct expr* operator = parse_expression();
  152.   struct expr* operand = parse_expression();
  153.  
  154.   skip_whitespace();
  155.   char c = getchar();
  156.   if(c != ')') {
  157.     printf("Expected ')' to end application expression, but got '%c'!\n", c);
  158.     longjmp(error, 1);
  159.   }
  160.  
  161.   struct expr* res = malloc(sizeof(struct expr));
  162.  
  163.   res->typ = APPLICATION;
  164.   res->app = malloc(sizeof(struct appl));
  165.   res->app->operator = operator;
  166.   res->app->operand = operand;
  167.  
  168.   return res;
  169. }
  170.  
  171. struct expr* parse_expression() {
  172.   skip_whitespace();
  173.  
  174.   char c = peekchar();
  175.   switch(c) {
  176.   case '\\':
  177.     return parse_lambda();
  178.   case '(':
  179.     return parse_application();
  180.   default:
  181.     return parse_symbol();
  182.   }
  183. }
  184.  
  185.  
  186. // buffer data structure and utility functions
  187. struct buffer {
  188.   int buf_size;
  189.   int buf_ptr;
  190.   int *buf;
  191. };
  192.  
  193. struct buffer* alloc_buffer(int init_size) {
  194.   struct buffer *buf = malloc(sizeof(struct buffer));
  195.   buf->buf_size = init_size;
  196.   buf->buf_ptr = 0;
  197.   buf->buf = malloc(sizeof(int)*init_size);
  198.   return buf;
  199. }
  200.  
  201. void add_to_buffer(struct buffer *buf, int val) {
  202.   if(buf->buf_ptr >= buf->buf_size) {
  203.     buf->buf_size *= 1.5;
  204.     buf->buf = realloc(buf->buf, sizeof(int)*buf->buf_size);
  205.   }
  206.   buf->buf[buf->buf_ptr++] = val;
  207. }
  208.  
  209. void free_buf(struct buffer* buf) {
  210.   free(buf->buf);
  211.   free(buf);
  212. }
  213.  
  214. struct buffer* merge_bufs(struct buffer* buf_a, struct buffer* buf_b) {
  215.   struct buffer* res = malloc(sizeof(struct buffer));
  216.   res->buf_ptr = (buf_a->buf_ptr + buf_b->buf_ptr);
  217.   res->buf_size = (buf_a->buf_size)+(buf_b->buf_size) * 1.5;
  218.   res->buf = malloc(sizeof(int)*res->buf_size);
  219.  
  220.   memcpy(res->buf, buf_a->buf, (buf_a->buf_ptr*sizeof(int)));
  221.   memcpy(res->buf+buf_a->buf_ptr, buf_b->buf, (buf_b->buf_ptr*sizeof(int)));
  222.   return res;
  223. }
  224.  
  225. struct buffer* merge_and_free_bufs(struct buffer* buf_a, struct buffer* buf_b) {
  226.   struct buffer* res = merge_bufs(buf_a, buf_b);
  227.   free_buf(buf_a);
  228.   free_buf(buf_b);
  229.   return res;
  230. }
  231.  
  232. typedef enum {
  233.   DUP, SWAP,
  234.   JMP, CALL, RET,
  235.   ENV_LOOKUP, EXTEND_ENV,
  236.   PUSH_ENV, POP_ENV, GET_ENV,
  237.   MK_CLOSURE,
  238.   GET_CLOSURE_ENV, GET_CLOSURE_CODE,
  239.   GET_REL_ADDR
  240. } opcode;
  241.  
  242.  
  243. struct buffer* compile_expression(struct expr* exp, struct cenv* environment);
  244.  
  245. struct buffer* compile_symbol_lookup(struct expr* exp, struct cenv* environment) {
  246.   char* sym = exp->sym;
  247.  
  248.   struct cenv* cur_env = environment;
  249.   int offset = 0;
  250.   while(cur_env) {
  251.     if(strcmp(cur_env->sym_name, sym) == 0) {
  252.       struct buffer* buf = alloc_buffer(2);
  253.       add_to_buffer(buf, ENV_LOOKUP);
  254.       add_to_buffer(buf, offset);
  255.       return buf;
  256.     }
  257.     offset++;
  258.     cur_env = cur_env->next;
  259.   }
  260.  
  261.   printf("Tried to reference symbol '%s' that is not bound!\n", sym);
  262.   longjmp(error, 1);
  263. }
  264.  
  265.  
  266. struct buffer* compile_application(struct expr* exp, struct cenv* environment) {
  267.   struct buffer* ev_operand = compile_expression(exp->app->operand, environment);
  268.   struct buffer* ev_operator = compile_expression(exp->app->operator, environment);
  269.  
  270.   struct buffer* buf = merge_and_free_bufs(ev_operand, ev_operator);
  271.  
  272.   add_to_buffer(buf, DUP);
  273.   add_to_buffer(buf, GET_CLOSURE_ENV);
  274.   add_to_buffer(buf, PUSH_ENV);
  275.   add_to_buffer(buf, SWAP);
  276.   add_to_buffer(buf, EXTEND_ENV);
  277.   add_to_buffer(buf, GET_CLOSURE_CODE);
  278.   add_to_buffer(buf, CALL);
  279.   return buf;
  280. }
  281.  
  282. struct buffer* compile_lambda(struct expr* exp, struct cenv* environment) {
  283.   // extended environment
  284.  
  285.   char* arg_name = exp->lam->arg;
  286.  
  287.   struct cenv* ext_env = malloc(sizeof(struct cenv));
  288.   ext_env->next = environment;
  289.   ext_env->sym_name = arg_name;
  290.  
  291.   struct buffer* body = compile_expression(exp->lam->body, ext_env);
  292.   add_to_buffer(body, POP_ENV);
  293.   add_to_buffer(body, RET);
  294.  
  295.  
  296.   struct buffer* buf = alloc_buffer(32);
  297.   add_to_buffer(buf, GET_REL_ADDR);
  298.   add_to_buffer(buf, 6);
  299.   add_to_buffer(buf, GET_ENV);
  300.   add_to_buffer(buf, MK_CLOSURE);
  301.   // jump past closure
  302.   add_to_buffer(buf, JMP);
  303.   add_to_buffer(buf, body->buf_ptr+2);
  304.  
  305.   return merge_and_free_bufs(buf, body);
  306. }
  307.  
  308. struct buffer* compile_expression(struct expr* exp, struct cenv* environment) {
  309.   switch(exp->typ) {
  310.   case SYMBOL:
  311.     return compile_symbol_lookup(exp, environment);
  312.   case APPLICATION:
  313.     return compile_application(exp, environment);
  314.   case LAMBDA:
  315.     return compile_lambda(exp, environment);
  316.   }
  317. }
  318.  
  319. struct renv {
  320.   struct val* value;
  321.   struct renv* next;
  322. };
  323.  
  324. struct closure {
  325.   int body_addr;
  326.   struct renv* bound_env;
  327. };
  328.  
  329. typedef enum {
  330.   ENVIRONMENT,
  331.   CLOSURE,
  332.   ADDRESS
  333. } val_type;
  334.  
  335. struct val {
  336.   val_type typ;
  337.   union {
  338.     struct renv* env;
  339.     struct closure* cls;
  340.     int addr;
  341.   };
  342. };
  343.  
  344. void print_value(struct val* v) {
  345.   switch(v->typ) {
  346.   case ENVIRONMENT:
  347.     printf("{environment}");
  348.     break;
  349.  
  350.   case CLOSURE:
  351.     printf("<lambda>");
  352.     break;
  353.    
  354.   case ADDRESS:
  355.     printf("[address: %x]", v->addr);
  356.     break;
  357.    
  358.   default:
  359.     printf("Unrecognized object type\n");
  360.     break;
  361.   }
  362. }
  363.  
  364.  
  365. struct renv *env_stack[1024];
  366. int esp = 1024;
  367.  
  368. int return_stack[1024];
  369. int rsp = 1024;
  370.  
  371. struct val *object_stack[1024];
  372. int sp = 1024;
  373.  
  374. struct val* execute_program(struct buffer* buf) {
  375.   printf("Executing program\n");
  376.   int pc = 0;
  377.   int *prog = buf->buf;
  378.   int prog_len = buf->buf_ptr;
  379.   struct renv *env = NULL;
  380.  
  381.  
  382.   struct val *tmp, *tmp2;
  383.   int tmp_addr;
  384.   struct renv* tmp_env;
  385.  
  386.  
  387.   while(pc < prog_len) {
  388.     opcode code = prog[pc++];
  389.    
  390.     switch(code) {
  391.     case DUP:
  392.       tmp = object_stack[sp];
  393.       object_stack[--sp] = tmp;
  394.       break;
  395.      
  396.     case SWAP:
  397.       tmp = object_stack[sp++];
  398.       tmp2 = object_stack[sp++];
  399.       object_stack[--sp] = tmp2;
  400.       object_stack[--sp] = tmp;
  401.       break;
  402.  
  403.     case JMP:
  404.       tmp_addr = pc-1;
  405.       pc = tmp_addr + prog[pc];
  406.       break;
  407.      
  408.     case CALL:
  409.       // push return address on stack
  410.       return_stack[--rsp] = pc+1;
  411.       // jump to location
  412.       pc = object_stack[sp++]->addr;
  413.       break;
  414.  
  415.     case RET:
  416.       pc = return_stack[rsp++];
  417.       break;
  418.  
  419.     case ENV_LOOKUP:
  420.       // grab environment offset
  421.       tmp_addr = prog[pc++];
  422.       tmp_env = env;
  423.       while(tmp_addr--) {
  424.         tmp_env = tmp_env->next;
  425.       }
  426.       object_stack[--sp] = tmp_env->value;
  427.       break;
  428.      
  429.     case PUSH_ENV:
  430.       env_stack[--esp] = env;
  431.       env = object_stack[sp++]->env;
  432.       break;
  433.      
  434.     case POP_ENV:
  435.       env = env_stack[esp++];
  436.       break;
  437.  
  438.     case EXTEND_ENV:
  439.       tmp_env = malloc(sizeof(struct renv));
  440.       tmp_env->value = object_stack[sp++];
  441.       tmp_env->next = env;
  442.       env = tmp_env;
  443.       break;
  444.  
  445.     case GET_ENV:
  446.       tmp = malloc(sizeof(struct val));
  447.       tmp->typ = ENVIRONMENT;
  448.       tmp->env = env;
  449.       object_stack[--sp] = tmp;
  450.       break;
  451.  
  452.     case MK_CLOSURE:
  453.       tmp = malloc(sizeof(struct val));
  454.       tmp->typ = CLOSURE;
  455.       tmp->cls = malloc(sizeof(struct closure));
  456.       tmp->cls->bound_env = object_stack[sp++]->env;
  457.       tmp->cls->body_addr = object_stack[sp++]->addr;
  458.       object_stack[--sp] = tmp;
  459.       break;
  460.  
  461.     case GET_CLOSURE_ENV:
  462.       tmp = malloc(sizeof(struct val));
  463.       tmp->typ = ENVIRONMENT;
  464.       tmp->env = object_stack[sp++]->cls->bound_env;
  465.       object_stack[--sp] = tmp;
  466.       break;
  467.  
  468.     case GET_CLOSURE_CODE:
  469.       tmp = malloc(sizeof(struct val));
  470.       tmp->typ = ADDRESS;
  471.       tmp->addr = object_stack[sp++]->cls->body_addr;
  472.       object_stack[--sp] = tmp;
  473.       break;
  474.      
  475.     case GET_REL_ADDR:
  476.       tmp_addr = pc-1;
  477.       tmp_addr += prog[pc++];
  478.      
  479.       tmp = malloc(sizeof(struct val));
  480.       tmp->typ = ADDRESS;
  481.       tmp->addr = tmp_addr;
  482.       object_stack[--sp] = tmp;
  483.       break;
  484.      
  485.     }
  486.   }
  487.   if(sp != 1023) {
  488.     printf("Execution ended with more than one item on the stack!\n");
  489.     exit(1);
  490.   }
  491.   return object_stack[sp];
  492. }
  493.  
  494. int main() {
  495.   while(1) {
  496.     // jump back to this point if we have an error
  497.     if(setjmp(error) == 1) {
  498.       while(getchar() != '\n');  // skip past unread input
  499.     }
  500.    
  501.     printf("Enter expression> ");
  502.     struct expr* exp = parse_expression();
  503.     struct buffer* compiled_code = compile_expression(exp, NULL);
  504.    
  505.     struct val* result = execute_program(compiled_code);
  506.     printf("Result> ");
  507.     print_value(result);
  508.     printf("\n");
  509.   }
  510.   return 0;
  511. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement