Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <stdarg.h>
- #include <ctype.h>
- #include <string.h>
- typedef struct _Token {
- int code; // codul (numele)
- union {
- char *text; // folosit pentru ID, CT_STRING (alocat dinamic)
- long int i; // folosit pentru CT_INT, CT_CHAR
- double r; // folosit pentru CT_REAL
- };
- int line; // linia din fisierul de intrare
- struct _Token *next; // inlantuire la urmatorul AL
- }Token;
- void err(const char *fmt, ...) {
- va_list va;
- va_start(va, fmt);
- fprintf(stderr, "error: ");
- vfprintf(stderr, fmt, va);
- fputc('\n', stderr);
- va_end(va);
- exit(-1);
- }
- #define SAFEALLOC(var,Type)if((var=(Type*)malloc(sizeof(Type)))==NULL)err("not enough memory");
- enum atomi {
- ID, END,
- CT_INT, CT_REAL, CT_CHAR, CT_STRING,
- COMA, SEMICOLON, LPAR, RPAR, LBRACKET, RBRACKET, LACC, RACC,
- ADD, SUB, MUL, DIV, DOT, AND, OR, NOT, ASSIGN, EQUAL, NOTEQ, LESS, LESSEQ, GREATER, GREATEREQ,
- BREAK, CHAR, DOUBLE, ELSE, FOR, IF, INT, RETURN, STRUCT, VOID, WHILE
- };
- char buff[50001];
- Token *lastToken = NULL;
- Token *tokens = NULL;
- char *pCrtCh = buff;
- int tkline = 0;
- char *pStartCh;
- Token *addTk(int code)
- {
- Token *tk;
- SAFEALLOC(tk, Token);
- tk->code = code;
- tk->line = tkline;
- tk->next = NULL;
- if (lastToken) {
- lastToken->next = tk;
- }
- else {
- tokens = tk;
- }
- lastToken = tk;
- return tk;
- }
- void tkerr(const Token *tk, const char *fmt, ...)
- {
- va_list va;
- va_start(va, fmt);
- fprintf(stderr, "error in line %d: ", tk->line);
- vfprintf(stderr, fmt, va);
- fputc('\n', stderr);
- va_end(va);
- exit(-1);
- }
- char *createString(const char *st, char *end) {
- char *string = (char*)malloc(sizeof(char)*(end - st + 1));
- int cnt = 0;
- while (st <= end) {
- if ((*st) != '\\') {
- string[cnt] = *st;
- cnt++;
- }
- else {
- if (*(st + 1) == 't') {
- string[cnt] = '\t';
- cnt++;
- st++;
- }
- else if (*(st + 1) == 'n') {
- string[cnt] = '\n';
- cnt++;
- st++;
- }
- else if (*(st + 1) == '?')
- {
- string[cnt] = '\?';
- cnt++;
- st++;
- }
- }
- st++;
- }
- string[cnt] = '\0';
- return string;
- }
- int getNextToken() {
- int state = 0;
- int nCh;
- char ch;
- Token *tk;
- while (1) {
- ch = *pCrtCh;
- switch (state) {
- case 0:
- if (ch == ',') {
- pCrtCh++;
- state = 33;
- }
- else if (ch == ';') {
- pCrtCh++;
- state = 34;
- }
- else if (ch == '(') {
- pCrtCh++;
- state = 35;
- }
- else if (ch == ')') {
- pCrtCh++;
- state = 36;
- }
- else if (ch == '[') {
- pCrtCh++;
- state = 38;
- }
- else if (ch == ']') {
- pCrtCh++;
- state = 37;
- }
- else if (ch == '{') {
- pCrtCh++;
- state = 39;
- }
- else if (ch == '}') {
- pCrtCh++;
- state = 40;
- }
- else if (ch == '+') {
- pCrtCh++;
- state = 48;
- }
- else if (ch == '-') {
- pCrtCh++;
- state = 49;
- }
- else if (ch == '*') {
- pCrtCh++;
- state = 50;
- }
- else if (ch == '.') {
- pCrtCh++;
- state = 47;
- }
- else if (ch == '&') {
- pCrtCh++;
- state = 51;
- }
- else if (ch == '|') {
- pCrtCh++;
- state = 53;
- }
- else if (ch == '/') {
- pCrtCh++;
- state = 24;
- }
- else if (ch == '=') {
- pCrtCh++;
- state = 55;
- }
- else if (ch == '!') {
- pCrtCh++;
- state = 58;
- }
- else if (ch == '>') {
- pCrtCh++;
- state = 41;
- }
- else if (ch == '<') {
- pCrtCh++;
- state = 44;
- }
- else if (ch == ' ' || ch == '\r' || ch == '\t') {
- pCrtCh++;
- }
- else if (ch == '\n') {
- pCrtCh++;
- tkline++;
- }
- else if (isalpha(ch) || ch == '_') {
- pStartCh = pCrtCh;
- pCrtCh++;
- state = 31;
- }
- else if (ch == '\'') {
- pCrtCh++;
- state = 14;
- }
- else if (ch == '"') {
- pStartCh = pCrtCh;
- pCrtCh++;
- state = 19;
- }
- else if (ch == '0') {
- pStartCh = pCrtCh;
- pCrtCh++;
- state = 3;
- }
- else if (isdigit(ch) && ch - '0' != 0) {
- pStartCh = pCrtCh;
- pCrtCh++;
- state = 1;
- }
- else tkerr(addTk(END), "caracter invalid");
- break;
- case 31:
- if (isalpha(ch) || isdigit(ch) || ch == '_') {
- pCrtCh++;
- }
- else {
- state = 32;
- }
- break;
- case 32:
- nCh = pCrtCh - pStartCh;
- if (nCh == 5 && !memcmp(pStartCh, "break", 5))
- tk = addTk(BREAK);
- else if (nCh == 4 && !memcmp(pStartCh, "char", 4))
- tk = addTk(CHAR);
- else if (nCh == 5 && !memcmp(pStartCh, "double", 5))
- tk = addTk(DOUBLE);
- else if (nCh == 4 && !memcmp(pStartCh, "else", 4))
- tk = addTk(ELSE);
- else if (nCh == 3 && !memcmp(pStartCh, "for", 3))
- tk = addTk(FOR);
- else if (nCh == 2 && !memcmp(pStartCh, "if", 2))
- tk = addTk(IF);
- else if (nCh == 3 && !memcmp(pStartCh, "int", 3))
- tk = addTk(INT);
- else if (nCh == 6 && !memcmp(pStartCh, "return", 6))
- tk = addTk(RETURN);
- else if (nCh == 6 && !memcmp(pStartCh, "struct", 6))
- tk = addTk(STRUCT);
- else if (nCh == 4 && !memcmp(pStartCh, "void", 4))
- tk = addTk(VOID);
- else if (nCh == 5 && !memcmp(pStartCh, "while", 5))
- tk = addTk(WHILE);
- else {
- tk = addTk(ID);
- tk->text = createString(pStartCh, pCrtCh - 1);
- }
- return tk->code;
- case 1:
- if (isdigit(ch)) {
- pCrtCh++;
- }
- else if (ch == '.') {
- pCrtCh++;
- state = 8;
- }
- else if (ch == 'e' || ch == 'E') {
- pCrtCh++;
- state = 10;
- }
- else {
- state = 2;
- }
- break;
- case 2:
- tk = addTk(CT_INT);
- if ((*(pStartCh + 1)) == 'x') {
- tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 16);
- }
- else if ((*pStartCh) == '0') {
- tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 8);
- }
- else
- tk->i = tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 10);
- return CT_INT;
- case 3:
- if (ch == 'x') {
- pCrtCh++;
- state = 5;
- }
- else if (ch == '9' || ch == '8') {
- pCrtCh++;
- state = 7;
- }
- else {
- pCrtCh++;
- state = 4;
- }
- break;
- case 4:
- if (ch - '0' <= 7 && ch - '0' >= 0) {
- pCrtCh++;
- }
- else if (ch == '.')
- {
- pCrtCh++;
- state = 9;
- }
- else if (ch == 'e' || ch == 'E')
- {
- pCrtCh++;
- state = 10;
- }
- else if (ch == '9' || ch == '8')
- {
- pCrtCh++;
- state = 7;
- }
- else {
- state = 2;
- }
- break;
- case 5:
- if (isdigit(ch) || (tolower(ch) <= 'f' && tolower(ch) >= 'a')) {
- pCrtCh++;
- state = 6;
- }
- else tkerr(addTk(END), "Caracter invalid la starea 5.\n");
- break;
- case 6:
- if (isdigit(ch) || (tolower(ch) <= 'f' && tolower(ch) >= 'a')) {
- pCrtCh++;
- }
- else {
- state = 2;
- }
- break;
- case 7:
- if (ch == '.')
- {
- pCrtCh++;
- state = 8;
- }
- else tkerr(addTk(END), "Caracter invalid la starea 7.\n");
- break;
- case 8:
- if (isdigit(ch))
- {
- pCrtCh++;
- state = 9;
- }
- else tkerr(addTk(END), "Caracter invalid la starea 8.\n");
- break;
- case 9:
- if (isdigit(ch))
- pCrtCh++;
- else if (ch == 'e' || ch == 'E')
- {
- pCrtCh++;
- state = 10;
- }
- else
- state = 13;
- break;
- case 10:
- if (ch == '-' || ch == '+') {
- pCrtCh++;
- state = 11;
- }
- else if (isdigit(ch)) {
- pCrtCh++;
- state = 12;
- }
- else tkerr(addTk(END), "Caracter invalid la starea 10.\n");
- break;
- case 11:
- if (isdigit(ch)) {
- pCrtCh++;
- state = 12;
- }
- else tkerr(addTk(END), "Caracter invalid la starea 11.\n");
- break;
- case 12:
- if (isdigit(ch)) {
- pCrtCh++;
- }
- else state = 13;
- break;
- case 13:
- tk = addTk(CT_REAL);
- tk->r = atof(createString(pStartCh, pCrtCh));
- return CT_REAL;
- case 14:
- //pStartCh = pCrtCh;
- if (ch == '\\')
- {
- pCrtCh++;
- state = 16;
- }
- else
- state = 17;
- break;
- case 16:
- if (ch == 'a' || ch == 'b' || ch == 'f' || ch == 'r' || ch == 'n' || ch == 't' || ch == 'v' || ch == '\'' || ch == '?' || ch == '"' || ch == '0' || ch == '\\') {
- pCrtCh++;
- state = 17;
- }
- else tkerr(addTk(END), "Caracter invalid la starea 16.\n");
- break;
- case 17:
- if (ch == '\'')
- {
- pCrtCh++;
- state = 18;
- }
- case 18:
- tk = addTk(CT_CHAR);
- if (*(pStartCh + 1) == 'n')
- {
- tk->i = '\n';
- break;
- }
- else if (*(pStartCh + 1) == 't')
- {
- tk->i = '\t';
- break;
- }
- else if (*(pStartCh + 1) == '\\')
- {
- tk->i = '\\';
- break;
- }
- else
- {
- tk->i = *pCrtCh - '0';
- return CT_CHAR;
- }
- case 19:
- if (ch == '\\')
- {
- pCrtCh++;
- state = 21;
- }
- else if (ch == '"')
- {
- pCrtCh++;
- state = 23;
- }
- else
- pCrtCh++;
- break;
- case 21:
- if (ch == 'a' || ch == 'b' || ch == 'f' || ch == 'r' || ch == 'n' || ch == 't' || ch == 'v' || ch == '\'' || ch == '?' || ch == '"' || ch == '0' || ch == '\\') {
- pCrtCh++;
- state = 22;
- }
- else tkerr(addTk(END), "Caracter invalid la starea 21.\n");
- break;
- case 22:
- if (ch == '"')
- {
- pCrtCh++;
- state = 23;
- }
- else
- state = 19;
- break;
- case 23:
- tk = addTk(CT_STRING);
- tk->text = createString(pStartCh + 1, pCrtCh - 2);
- return CT_STRING;
- case 24:
- if (ch == '*')
- {
- pCrtCh++;
- state = 26;
- }
- else if (ch == '/')
- {
- pCrtCh++;
- state = 61;
- }
- else
- state = 25;
- break;
- case 25:
- addTk(DIV);
- return DIV;
- case 26:
- if (ch == '*')
- {
- pCrtCh++;
- state = 27;
- }
- else
- pCrtCh++;
- break;
- case 27:
- if (ch == '*')
- pCrtCh++;
- if (ch == '/')
- state = 0;
- else
- state = 26;
- break;
- case 33:
- addTk(COMA);
- return COMA;
- case 34:
- addTk(SEMICOLON);
- return SEMICOLON;
- case 35:
- addTk(LPAR);
- return LPAR;
- case 36:
- addTk(RPAR);
- return RPAR;
- case 38:
- addTk(LBRACKET);
- return LBRACKET;
- case 37:
- addTk(RBRACKET);
- return RBRACKET;
- case 39:
- addTk(LACC);
- return LACC;
- case 40:
- addTk(RACC);
- return RACC;
- case 41:
- if (ch == '=')
- {
- pCrtCh++;
- state = 43;
- }
- else state = 42;
- break;
- case 42:
- addTk(GREATER);
- return GREATER;
- case 43:
- addTk(GREATEREQ);
- return GREATEREQ;
- case 44:
- if (ch == '=')
- {
- pCrtCh++;
- state = 46;
- }
- else
- state = 45;
- break;
- case 45:
- addTk(LESS);
- return LESS;
- case 46:
- addTk(LESSEQ);
- return LESSEQ;
- case 47:
- addTk(DOT);
- return DOT;
- case 48:
- addTk(ADD);
- return ADD;
- case 49:
- addTk(SUB);
- return SUB;
- case 50:
- addTk(MUL);
- return MUL;
- case 51:
- if (ch == '&')
- {
- pCrtCh++;
- state = 52;
- break;
- }
- else
- tkerr(addTk(END), "Caracter invalid la starea 51.\n");
- break;
- case 52:
- addTk(AND);
- return AND;
- case 53:
- if (ch == '|')
- {
- pCrtCh++;
- state = 54;
- }
- else tkerr(addTk(END), "Caracter invalid la starea 53.\n");
- break;
- case 54:
- addTk(OR);
- return OR;
- case 55:
- if (ch == '=')
- {
- pCrtCh++;
- state = 57;
- }
- else
- state = 56;
- break;
- case 56:
- addTk(ASSIGN);
- return ASSIGN;
- case 57:
- addTk(EQUAL);
- return EQUAL;
- case 58:
- if (ch == '=')
- {
- pCrtCh++;
- state = 59;
- }
- else
- state = 60;
- break;
- case 59:
- addTk(NOTEQ);
- return NOTEQ;
- case 60:
- addTk(NOT);
- return NOT;
- case 61:
- if (ch != '\n' || ch != '\t' || ch != '\r')
- pCrtCh++;
- else
- state = 0;
- break;
- }
- }
- }
- void afisare() {
- char *it = pCrtCh;
- while ((*it) != '\0') {
- printf("%c", *it);
- it++;
- }
- printf("\n");
- }
- void printAtom(Token *tk) {
- if (tk->code == END) {
- printf("END\n");
- }
- else if (tk->code == COMA) {
- printf("COMMA ");
- }
- else if (tk->code == SEMICOLON) {
- printf("SEMICOLON ");
- }
- else if (tk->code == LPAR) {
- printf("LPAR ");
- }
- else if (tk->code == RPAR) {
- printf("RPAR ");
- }
- else if (tk->code == LBRACKET) {
- printf("LBRACKET ");
- }
- else if (tk->code == RBRACKET) {
- printf("RBRACKET ");
- }
- else if (tk->code == LACC) {
- printf("LACC ");
- }
- else if (tk->code == RACC) {
- printf("RACC ");
- }
- else if (tk->code == ADD) {
- printf("ADD ");
- }
- else if (tk->code == SUB) {
- printf("SUB ");
- }
- else if (tk->code == MUL) {
- printf("MUL ");
- }
- else if (tk->code == DOT) {
- printf("DOT ");
- }
- else if (tk->code == AND) {
- printf("AND ");
- }
- else if (tk->code == OR) {
- printf("OR ");
- }
- else if (tk->code == DIV) {
- printf("DIV ");
- }
- else if (tk->code == NOT) {
- printf("NOT ");
- }
- else if (tk->code == NOTEQ) {
- printf("NOTEQ ");
- }
- else if (tk->code == ASSIGN) {
- printf("ASSIGN ");
- }
- else if (tk->code == EQUAL) {
- printf("EQUAL ");
- }
- else if (tk->code == GREATER) {
- printf("GREATER ");
- }
- else if (tk->code == GREATEREQ) {
- printf("GREATEREQ ");
- }
- else if (tk->code == LESS) {
- printf("LESS ");
- }
- else if (tk->code == LESSEQ) {
- printf("LESSEQ ");
- }
- else if (tk->code == ID) {
- printf("ID:%s ", tk->text);
- }
- else if (tk->code == CT_CHAR) {
- printf("CT_CHAR:%c ", tk->i + '0');
- }
- else if (tk->code == CT_STRING) {
- printf("CT_STRING:%s ", tk->text);
- }
- else if (tk->code == CT_INT) {
- printf("CT_INT:%d ", tk->i);
- }
- else if (tk->code == CT_REAL) {
- printf("CT_REAL:%f ", tk->r);
- }
- else if (tk->code == BREAK) {
- printf("BREAK ");
- }
- else if (tk->code == CHAR) {
- printf("CHAR ");
- }
- else if (tk->code == DOUBLE) {
- printf("DOUBLE ");
- }
- else if (tk->code == ELSE) {
- printf("ELSE ");
- }
- else if (tk->code == FOR) {
- printf("FOR ");
- }
- else if (tk->code == IF) {
- printf("IF ");
- }
- else if (tk->code == INT) {
- printf("INT ");
- }
- else if (tk->code == RETURN) {
- printf("RETURN ");
- }
- else if (tk->code == STRUCT) {
- printf("STRUCT ");
- }
- else if (tk->code == VOID) {
- printf("VOID ");
- }
- else if (tk->code == WHILE) {
- printf("WHILE ");
- }
- }
- void Atoms_afis() {
- Token *tk1 = tokens;
- printf("Result:\n");
- while (tk1 != NULL) {
- printAtom(tk1);
- tk1 = tk1->next;
- }
- printf("\n");
- }
- int main() {
- FILE *f;
- int noCh;
- if ((f = fopen("a.txt", "r")) == NULL) {
- printf("Eroare la deschiderea fisierului\n");
- exit(-1);
- }
- if ((noCh = fread(buff, 1, 50000, f)) <= 0) {
- printf("Eroare la citirea din fisier\n");
- exit(-1);
- }
- buff[noCh] = '\0';
- afisare();
- while ((*pCrtCh) != '\0') {
- getNextToken();
- }
- addTk(END);
- Atoms_afis();
- fclose(f);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement