Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* fichier: "petit-comp-print.c" */
- /* Un petit compilateur et machine virtuelle pour un sous-ensemble de C. */
- /*TP1 -- IFT2035
- * Simon Besozzi
- * Ryan Godin
- * */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <setjmp.h>
- /*---------------------------------------------------------------------------*/
- /* Analyseur lexical. */
- enum {
- DO_SYM, ELSE_SYM, IF_SYM, WHILE_SYM, PRINT_SYM, BREAK_SYM, CONTINUE_SYM, GOTO_SYM, LBRA, RBRA, LPAR,
- RPAR, PLUS, MINUS, LESS, SEMI, COL, EQUAL, INT, ID, LTOE, GTOE, GREATER, EQUIV, NEQUAL, MULT, DIV, MOD, EOI
- };
- char *words[] = {"do", "else", "if", "while", "print", "break", "continue", "goto", NULL};
- int ch = ' ';
- int sym;
- int int_val;
- char id_name[100];
- char tag_name[1000];
- int tag_iter = 0;
- jmp_buf env;
- int fileRead = 0; //1 if the compiler has to run a file specified in the command line arguments
- int isId = 0; //Checks if : can be used instead of ;
- FILE *source;
- void syntax_error(char *errorReport);
- void next_ch() {
- //This reads the next char from an outside file, rather than user input
- if (fileRead == 1 && ch != '\0' && source != NULL) {
- ch = fgetc(source);
- } else {
- ch = getchar();
- }
- }
- void next_sym() {
- while (ch == ' ' || ch == '\n') next_ch();
- switch (ch) {
- case '{':
- sym = LBRA;
- next_ch();
- break;
- case '}':
- sym = RBRA;
- next_ch();
- break;
- case '(':
- sym = LPAR;
- next_ch();
- break;
- case ')':
- sym = RPAR;
- next_ch();
- break;
- case '+':
- sym = PLUS;
- next_ch();
- break;
- case '-':
- sym = MINUS;
- next_ch();
- break;
- case '/':
- sym = DIV;
- next_ch();
- break; // new
- case '*':
- sym = MULT;
- next_ch();
- break; // new
- case '%':
- sym = MOD;
- next_ch();
- break; // new
- case '<': //Checks to see if the next character completes <= operation or simply <
- next_ch();
- if (ch == '=') {
- sym = LTOE;
- next_ch();
- } else sym = LESS;
- break;
- case '>': //Checks to see if the next character completes >= operation or simply >
- next_ch();
- if (ch == '=') {
- sym = GTOE;
- next_ch();
- } else sym = GREATER;
- break;
- case ';':
- sym = SEMI;
- next_ch();
- break;
- case '=': //Checks to see if the next character completes == operation or simply =
- next_ch();
- if (ch == '=') {
- sym = EQUIV;
- next_ch();
- } else sym = EQUAL;
- break;
- case ':': //Checks for the last label declared
- sym = COL;
- tag_name[tag_iter - 1] = id_name[0]; //Adds the label for the goto
- next_ch();
- break;
- case '!' : //Checks to see if the next character completes != operation
- next_ch();
- if (ch == '=') {
- sym = NEQUAL;
- next_ch();
- }
- break;
- case EOF:
- sym = EOI;
- next_ch();
- break;
- default:
- if (ch >= '0' && ch <= '9') {
- int_val = 0; /* overflow? */
- while (ch >= '0' && ch <= '9') {
- int_val = int_val * 10 + (ch - '0');
- next_ch();
- }
- sym = INT;
- } else if (ch >= 'a' && ch <= 'z') {
- int i = 0; /* overflow? */
- while ((ch >= 'a' && ch <= 'z') || ch == '_') {
- id_name[i++] = ch;
- next_ch();
- }
- id_name[i] = '\0';
- sym = 0;
- while (words[sym] != NULL && strcmp(words[sym], id_name) != 0)
- sym++;
- if (words[sym] == NULL) {
- if (id_name[1] == '\0') sym = ID;
- else
- syntax_error("--> words[sym] == NULL | line 107 | function next_sym");
- }
- } else syntax_error("--> default | line 110 | function next_sym");
- }
- tag_iter++; //This was the first attemps to implement label addresses
- //Did not work, but compatible with current implementation
- }
- /*---------------------------------------------------------------------------*/
- /* Analyseur syntaxique. */
- enum {
- VAR, CST, ADD, SUB, LT, GT, LTE, GTE, MLT, DIVI, MODU, EQ, NEQ, ASSIGN,
- IF1, IF2, WHILE, DO, PRINT, EMPTY, SEQ, EXPR, GOTO_K, BREAK, CONTINUE, PROG
- };
- struct node {
- int kind;
- struct node *o1;
- struct node *o2;
- struct node *o3;
- int val;
- };
- typedef struct node node;
- //This is the first node of the ASA, typically a PROGRAM type node
- node *start;
- /**
- * This method is called every time the program has to terminate to prevent memory leaks
- * parameter node *x is the inital node of the ASA
- */
- void freeMem(node *x) {
- while (x->kind != EMPTY && x->o1 != NULL) {
- freeMem(x->o1);
- x->o1 = NULL;
- break;
- }
- while (x->kind != EMPTY && x->o2 != NULL) {
- freeMem(x->o2);
- x->o2 = NULL;
- break;
- }
- while (x->kind != EMPTY && x->o3 != NULL) {
- freeMem(x->o3);
- x->o3 = NULL;
- break;
- }
- if (x->kind != EMPTY) {
- free(x);
- }
- return;
- }
- node *new_node(int k) {
- node *x = malloc(sizeof(node));
- /*
- This statement deals with memory exhaustion issues
- */
- if (x != NULL) {
- x->kind = k;
- return x;
- } else {
- fprintf(stderr, "Memory exhausted");
- freeMem(start); //Prevents memory leaks
- exit(1);
- }
- }
- node *paren_expr(); /* forward declaration */
- node *tag() {
- node *x;
- x = new_node(CST);
- x->val = id_name[0];
- next_sym();
- return x;
- }
- node *term() /* <term> ::= <id> | <int> | <paren_expr> */
- {
- node *x;
- if (sym == ID) /* <term> ::= <id> */
- {
- isId = 1; //Boolean to determine if a ':' char can be used instead of ';'
- x = new_node(VAR);
- x->val = id_name[0] - 'a';
- next_sym();
- } else if (sym == INT) /* <term> ::= <int> */
- {
- x = new_node(CST);
- x->val = int_val;
- next_sym();
- } else /* <term> ::= <paren_expr> */
- x = paren_expr();
- return x;
- }
- node *mult() /*<mult> ::= <term>|<mult>"*"<term>|<mult>"/"<term>|<mult>"%"<term> */
- {
- node *x = term();
- if (sym == MULT) {
- node *t = x;
- x = new_node(MLT);
- next_sym();
- x->o1 = t;
- x->o2 = term();
- } else if (sym == DIV) {
- node *t = x;
- x = new_node(DIVI);
- next_sym();
- x->o1 = t;
- x->o2 = term();
- } else if (sym == MOD) {
- node *t = x;
- x = new_node(MODU);
- next_sym();
- x->o1 = t;
- x->o2 = term();
- }
- return x;
- }
- node *sum() /* <sum> ::= <term>|<sum>"+"<term>|<sum>"-"<term> */
- {
- node *x = term();
- while (sym == PLUS || sym == MINUS) {
- node *t = x;
- x = new_node(sym == PLUS ? ADD : SUB);
- next_sym();
- x->o1 = t;
- x->o2 = term();
- }
- /*<sum> ::= <term>|<sum>"*"<term>|<sum>"/"<term>|<sum>"%"<term> */
- while (sym == MULT || sym == DIV || sym == MOD) {
- if (sym == MULT) {
- node *t = x;
- x = new_node(MLT);
- next_sym();
- x->o1 = t;
- x->o2 = mult();
- } else if (sym == DIV) {
- node *t = x;
- x = new_node(DIVI);
- next_sym();
- x->o1 = t;
- x->o2 = mult();
- } else if (sym == MOD) {
- node *t = x;
- x = new_node(MODU);
- next_sym();
- x->o1 = t;
- x->o2 = mult();
- }
- }
- return x;
- }
- node *test()
- {
- node *x = sum();
- /* <test> ::= <sum> | <sum> "<" <sum> */
- if (sym == LESS) {
- node *t = x;
- x = new_node(LT);
- next_sym();
- x->o1 = t;
- x->o2 = sum();
- }
- /* <test> ::= <sum> | <sum> ">" <sum> */
- else if (sym == GREATER) {
- node *t = x;
- x = new_node(GT);
- next_sym();
- x->o1 = t;
- x->o2 = sum();
- }
- /* <test> ::= <sum> | <sum> "<=" <sum> */
- else if (sym == LTOE) {
- node *t = x;
- x = new_node(LTE);
- next_sym();
- x->o1 = t;
- x->o2 = sum();
- }
- /* <test> ::= <sum> | <sum> ">=" <sum> */
- else if (sym == GTOE) {
- node *t = x;
- x = new_node(GTE);
- next_sym();
- x->o1 = t;
- x->o2 = sum();
- }
- /* <test> ::= <sum> | <sum> "==" <sum> */
- else if (sym == EQUIV) {
- node *t = x;
- x = new_node(EQ);
- next_sym();
- x->o1 = t;
- x->o2 = sum();
- }
- /* <test> ::= <sum> | <sum> "!=" <sum> */
- else if (sym == NEQUAL) {
- node *t = x;
- x = new_node(NEQ);
- next_sym();
- x->o1 = t;
- x->o2 = sum();
- }
- return x;
- }
- node *expr() /* <expr> ::= <test> | <id> "=" <expr> */
- {
- node *x;
- if (sym != ID) return test();
- x = test();
- if (sym == EQUAL) {
- node *t = x;
- x = new_node(ASSIGN);
- next_sym();
- x->o1 = t;
- x->o2 = expr();
- }
- return x;
- }
- node *paren_expr() /* <paren_expr> ::= "(" <expr> ")" */
- {
- node *x;
- if (sym == LPAR) next_sym(); else syntax_error("--> sym != LPAR | line 318 | function *paren_expr");
- x = expr();
- if (sym == RPAR) next_sym(); else syntax_error("--> sym != RPAR | line 322 | function *paren_expr");
- return x;
- }
- node *statement() {
- node *x;
- if (sym == IF_SYM) /* "if" <paren_expr> <stat> */
- {
- x = new_node(IF1);
- next_sym();
- x->o1 = paren_expr();
- x->o2 = statement();
- if (sym == ELSE_SYM) /* ... "else" <stat> */
- {
- x->kind = IF2;
- next_sym();
- x->o3 = statement();
- }
- } else if (sym == WHILE_SYM) /* "while" <paren_expr> <stat> */
- {
- x = new_node(WHILE);
- next_sym();
- x->o1 = paren_expr();
- x->o2 = statement();
- } else if (sym == DO_SYM) /* "do" <stat> "while" <paren_expr> ";" */
- {
- x = new_node(DO);
- next_sym();
- x->o1 = statement();
- if (sym == WHILE_SYM) next_sym(); else syntax_error("--> sym != WHILE_SYM | line 355 | function *statement");
- x->o2 = paren_expr();
- if (sym == SEMI) next_sym(); else syntax_error("--> sym != SEMI | line 357 | function *statement");
- } else if (sym == PRINT_SYM) /* "print" <paren_expr> ";" */
- {
- x = new_node(PRINT);
- next_sym();
- x->o1 = paren_expr();
- if (sym == SEMI) next_sym(); else syntax_error("--> sym != SEMI | line 364 | function *statement");
- }
- // Creates new goto node, checks if the goto has a id and semi colon.
- else if (sym == GOTO_SYM) {
- x = new_node(GOTO_K);
- next_sym();
- if(sym == ID) x->o1=tag(); else syntax_error("--> sym != ID | line 450 | function *statement");
- x->o2 = NULL;
- x->o3 = NULL;
- if (sym == SEMI) next_sym(); else syntax_error("--> sym != SEMI | line 364 | function *statement");
- }
- // Creates new break node, checks if the break has a id and semi colon.
- else if (sym == BREAK_SYM) {
- /*
- x = new_node(BREAK);
- next_sym();
- if (sym == ID){ x->o1 = term(); next_sym(); }
- if (sym == SEMI)next_sym(); else syntax_error("Missing semi colon after break statement");
- */
- }
- // Creates new continue node, checks if the continue has a id and semi colon.
- else if (sym == CONTINUE_SYM) {
- /* x = new_node(CONTINUE);
- next_sym();
- if (sym == ID){ x->o1 = term(); next_sym(); }
- if (sym == SEMI)next_sym(); else syntax_error("Missing semi colon after continue statement");
- */
- }
- else if (sym == SEMI) /* ";" */
- {
- x = new_node(EMPTY);
- next_sym();
- } else if (sym == COL) {
- x = new_node(EMPTY);
- next_sym();
- } else if (sym == LBRA) /* "{" { <stat> } "}" */
- {
- x = new_node(EMPTY);
- next_sym();
- while (sym != RBRA) {
- node *t = x;
- x = new_node(SEQ);
- x->o1 = t;
- x->o2 = statement();
- }
- next_sym();
- } else /* <expr> ";" */
- {
- x = new_node(EXPR);
- x->o1 = expr();
- if (sym == SEMI || (sym == COL && isId == 1)) next_sym();
- else
- syntax_error("--> sym != SEMI | line 400 | function *statement");
- isId = 0;
- }
- return x;
- }
- node *program() /* <program> ::= <stat> */
- {
- node *x = new_node(PROG);
- start = x;
- next_sym();
- x->o1 = statement();
- if (sym != EOI) syntax_error("--> sym != EOI | line 412 | function *program");
- return x;
- }
- /**
- * This method has been modified to call freeMem when a syntax error occurs the program ends abruptly
- * - Simon
- */
- void syntax_error(char *errorReport) {
- {
- fprintf(stderr, "syntax error\n");
- printf("%s", errorReport);
- freeMem(start);
- exit(1);
- }
- }
- void wip(char type[]) {
- fprintf(stderr, "%s partly implemented, but crashes or doesn't work properly", type);
- freeMem(start);
- exit(1);
- }
- /*---------------------------------------------------------------------------*/
- /* Generateur de code. */
- enum {
- ILOAD, ISTORE, BIPUSH, DUP, POP, IADD, ISUB, IPRINT,
- GOTO, IFEQ, IFNE, IFLT, IFGT, IMULT, IDIV, IMOD, IEQ, INEQ, ILOE, IGOE, RETURN
- };
- typedef signed char code;
- int tags[256][0];
- code object[1000], *here = object;
- int idName;
- int ad = 0;
- void gen(code c) {
- ad++;
- *here++ = c;
- } /* overflow? */
- #ifdef SHOW_CODE
- #define g(c) do { printf(" %d",c); gen(c); } while (0)
- #define gi(c) do { printf("\n%s", #c); gen(c); } while (0)
- #else
- #define g(c) gen(c)
- #define gi(c) gen(c)
- #endif
- void fix(code *src, code *dst) { *src = dst - src; } /* overflow? */
- void c(node *x) {
- tags[x->val][0] = ad; //This array holds the addresses for the labels. Not working, so can't implement goto
- switch (x->kind) {
- case VAR :
- gi(ILOAD);
- g(x->val);
- break;
- case CST :
- gi(BIPUSH);
- g(x->val);
- break;
- case ADD :
- c(x->o1);
- c(x->o2);
- gi(IADD);
- break;
- case SUB :
- c(x->o1);
- c(x->o2);
- gi(ISUB);
- break;
- case MLT :
- c(x->o1);
- c(x->o2);
- gi(IMULT);
- break;
- case DIVI :
- c(x->o1);
- c(x->o2);
- gi(IDIV);
- break;
- case MODU :
- c(x->o1);
- c(x->o2);
- gi(IMOD);
- break;
- case EQ :
- c(x->o1);
- c(x->o2);
- gi(IEQ);
- break;
- case NEQ :
- c(x->o1);
- c(x->o2);
- gi(INEQ);
- break;
- case LTE :
- c(x->o1);
- c(x->o2);
- gi(ILOE);
- break;
- case GTE :
- c(x->o1);
- c(x->o2);
- gi(IGOE);
- break;
- case LT :
- gi(BIPUSH);
- g(1);
- c(x->o1);
- c(x->o2);
- gi(ISUB);
- gi(IFLT);
- g(4);
- gi(POP);
- gi(BIPUSH);
- g(0);
- break;
- case GT :
- gi(BIPUSH);
- g(0);
- c(x->o2);
- c(x->o1);
- gi(ISUB);
- gi(IFGT);
- g(4);
- gi(POP);
- gi(BIPUSH);
- g(1);
- break;
- case ASSIGN:
- c(x->o2);
- gi(DUP);
- gi(ISTORE);
- g(x->o1->val);
- break;
- case IF1 : {
- code *p1;
- c(x->o1);
- gi(IFEQ);
- p1 = here++;
- c(x->o2);
- fix(p1, here);
- break;
- }
- case IF2 : {
- code *p1, *p2;
- c(x->o1);
- gi(IFEQ);
- p1 = here++;
- c(x->o2);
- gi(GOTO);
- p2 = here++;
- fix(p1, here);
- c(x->o3);
- fix(p2, here);
- break;
- }
- case WHILE : {
- code *p1 = here, *p2;
- c(x->o1);
- gi(IFEQ);
- p2 = here++;
- c(x->o2);
- gi(GOTO);
- fix(here++, p1);
- fix(p2, here);
- break;
- }
- case DO : {
- code *p1 = here;
- c(x->o1);
- c(x->o2);
- gi(IFNE);
- fix(here++, p1);
- break;
- }
- case PRINT : {
- c(x->o1);
- gi(IPRINT);
- break;
- }
- case EMPTY :
- break;
- case SEQ :
- c(x->o1);
- c(x->o2);
- break;
- case EXPR :
- c(x->o1);
- gi(POP);
- break;
- case PROG :
- c(x->o1);
- gi(RETURN);
- break;
- case GOTO_K :
- idName = x->o1->val;
- int hasTag;
- int tag = tags[idName - 'a'][0];
- /**
- * Checks if a label is declared
- */
- for (int i = 0; i < 1000; i++) {
- if (idName == tag_name[i]) {
- if (hasTag) {
- fprintf(stderr, "forbidden double declaration of a label");
- freeMem(start);
- exit(1);
- }
- hasTag = 1;
- }
- }
- if (hasTag != 1) {
- fprintf(stderr, "undeclared goto destination");
- freeMem(start);
- exit(1);
- }
- wip("goto");
- gi(GOTO);
- g(tag - ad);
- //(tag-ad % 2 == 0) ? g(tag-ad) : g((tag-ad)+1);
- break;
- case BREAK :
- wip("break");
- break;
- case CONTINUE :
- wip("continue");
- break;
- }
- }
- /*---------------------------------------------------------------------------*/
- /* Machine virtuelle. */
- int globals[26];
- void run() {
- int stack[1000], *sp = stack; /* overflow? */
- code *pc = object;
- for (;;)
- switch (*pc++) {
- case ILOAD :
- *sp++ = globals[*pc++];
- break;
- case ISTORE:
- globals[*pc++] = *--sp;
- break;
- case BIPUSH:
- *sp++ = *pc++;
- break;
- case DUP :
- sp++;
- sp[-1] = sp[-2];
- break;
- case POP :
- --sp;
- break;
- case IADD :
- sp[-2] = sp[-2] + sp[-1];
- --sp;
- break;
- case ISUB :
- sp[-2] = sp[-2] - sp[-1];
- --sp;
- break;
- case IMULT :
- sp[-2] = sp[-2] * sp[-1];
- --sp;
- break;
- case IMOD :
- sp[-2] = sp[-2] % sp[-1];
- --sp;
- break;
- case IDIV :
- sp[-2] = sp[-2] / sp[-1];
- --sp;
- break;
- case IEQ :
- sp[-2] = sp[-2] == sp[-1];
- --sp;
- break;
- case INEQ :
- sp[-2] = sp[-2] != sp[-1];
- --sp;
- break;
- case ILOE :
- sp[-2] = sp[-2] <= sp[-1];
- --sp;
- break;
- case IGOE :
- sp[-2] = sp[-2] >= sp[-1];
- --sp;
- break;
- case IPRINT:
- printf("%d\n", *--sp);
- break;
- case GOTO :
- pc += *pc;
- break;
- case IFEQ :
- if (*--sp == 0) pc += *pc;
- else pc++;
- break;
- case IFNE :
- if (*--sp != 0) pc += *pc;
- else pc++;
- break;
- case IFLT :
- if (*--sp < 0) pc += *pc;
- else pc++;
- break;
- case IFGT :
- if (*--sp > 0) pc += *pc;
- else pc++;
- break;
- case RETURN:
- return;
- }
- }
- //Reads a file
- void runFromFile(char in[]) {
- source = fopen(in, "r");
- if (source == NULL) {
- printf("File not found. Enter source code here: \n");
- longjmp(env, 1);
- }
- }
- /*---------------------------------------------------------------------------*/
- /* Programme principal. */
- int main(int argc, char *argv[]) {
- //In case of an I/O error while reading the source code from a file
- int code = setjmp(env);
- int i;
- //Ask for user input if the file was incorrect
- if (code != 0) {
- goto nofile;
- }
- //If a file path has been specified in the command line read source code from file
- if (argc > 1) {
- runFromFile(argv[1]);
- }
- //This will affect how next_ch will behave
- fileRead = 1;
- nofile:
- c(program());
- #ifdef SHOW_CODE
- printf("\n");
- #endif
- for (i = 0; i < 26; i++)
- globals[i] = 0;
- run();
- return 0;
- }
- /*---------------------------------------------------------------------------*/
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement