Untitled

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <ctype.h>
#include <string.h>


typedef struct _Token {
	int code; // codul (numele)
	union {
		char *text; // folosit pentru ID, CT_STRING (alocat dinamic)
		long int i; // folosit pentru CT_INT, CT_CHAR
		double r; // folosit pentru CT_REAL
	};
	int line; // linia din fisierul de intrare
	struct _Token *next; // inlantuire la urmatorul AL
}Token;

void err(const char *fmt, ...) {
	va_list va;
	va_start(va, fmt);
	fprintf(stderr, "error: ");
	vfprintf(stderr, fmt, va);
	fputc('\n', stderr);
	va_end(va);
	exit(-1);
}


#define SAFEALLOC(var,Type)if((var=(Type*)malloc(sizeof(Type)))==NULL)err("not enough memory");

enum atomi {
	ID, END,
	CT_INT, CT_REAL, CT_CHAR, CT_STRING,
	COMA, SEMICOLON, LPAR, RPAR, LBRACKET, RBRACKET, LACC, RACC,
	ADD, SUB, MUL, DIV, DOT, AND, OR, NOT, ASSIGN, EQUAL, NOTEQ, LESS, LESSEQ, GREATER, GREATEREQ,
	BREAK, CHAR, DOUBLE, ELSE, FOR, IF, INT, RETURN, STRUCT, VOID, WHILE
};

char buff[50001];
Token *lastToken = NULL;
Token *tokens = NULL;
char *pCrtCh = buff;
int tkline = 0;
char *pStartCh;

Token *addTk(int code)
{
	Token *tk;
	SAFEALLOC(tk, Token);
	tk->code = code;
	tk->line = tkline;
	tk->next = NULL;
	if (lastToken) {
		lastToken->next = tk;
	}
	else {
		tokens = tk;
	}
	lastToken = tk;
	return tk;
}

void tkerr(const Token *tk, const char *fmt, ...)
{
	va_list va;
	va_start(va, fmt);
	fprintf(stderr, "error in line %d: ", tk->line);
	vfprintf(stderr, fmt, va);
	fputc('\n', stderr);
	va_end(va);
	exit(-1);
}


char *createString(const char *st, char *end) {

	char *string = (char*)malloc(sizeof(char)*(end - st + 1));
	int cnt = 0;
	while (st <= end) {
		if ((*st) != '\\') {
			string[cnt] = *st;
			cnt++;
		}
		else {
			if (*(st + 1) == 't') {
				string[cnt] = '\t';
				cnt++;
				st++;
			}
			else if (*(st + 1) == 'n') {
				string[cnt] = '\n';
				cnt++;
				st++;
			}
			else if (*(st + 1) == '?')
			{
				string[cnt] = '\?';
				cnt++;
				st++;
			}
		}
		st++;
	}
	string[cnt] = '\0';
	return string;
}
int getNextToken() {
	int state = 0;
	int nCh;
	char ch;

	Token *tk;

	while (1) {
		ch = *pCrtCh;
		switch (state) {
		case 0:
			if (ch == ',') {
				pCrtCh++;
				state = 33;
			}
			else if (ch == ';') {
				pCrtCh++;
				state = 34;
			}
			else if (ch == '(') {
				pCrtCh++;
				state = 35;
			}
			else if (ch == ')') {
				pCrtCh++;
				state = 36;
			}
			else if (ch == '[') {
				pCrtCh++;
				state = 38;
			}
			else if (ch == ']') {
				pCrtCh++;
				state = 37;
			}
			else if (ch == '{') {
				pCrtCh++;
				state = 39;
			}
			else if (ch == '}') {
				pCrtCh++;
				state = 40;
			}
			else if (ch == '+') {
				pCrtCh++;
				state = 48;
			}
			else if (ch == '-') {
				pCrtCh++;
				state = 49;
			}
			else if (ch == '*') {
				pCrtCh++;
				state = 50;
			}
			else if (ch == '.') {
				pCrtCh++;
				state = 47;
			}
			else if (ch == '&') {
				pCrtCh++;
				state = 51;
			}
			else if (ch == '|') {
				pCrtCh++;
				state = 53;
			}
			else if (ch == '/') {
				pCrtCh++;
				state = 24;
			}
			else if (ch == '=') {
				pCrtCh++;
				state = 55;
			}
			else if (ch == '!') {
				pCrtCh++;
				state = 58;
			}
			else if (ch == '>') {
				pCrtCh++;
				state = 41;
			}
			else if (ch == '<') {
				pCrtCh++;
				state = 44;
			}
			else if (ch == ' ' || ch == '\r' || ch == '\t') {
				pCrtCh++;
			}
			else if (ch == '\n') {
				pCrtCh++;
				tkline++;
			}
			else if (isalpha(ch) || ch == '_') {
				pStartCh = pCrtCh;
				pCrtCh++;
				state = 31;
			}
			else if (ch == '\'') {
				pCrtCh++;
				state = 14;
			}
			else if (ch == '"') {
				pStartCh = pCrtCh;
				pCrtCh++;
				state = 19;
			}
			else if (ch == '0') {
				pStartCh = pCrtCh;
				pCrtCh++;
				state = 3;
			}
			else if (isdigit(ch) && ch - '0' != 0) {
				pStartCh = pCrtCh;
				pCrtCh++;
				state = 1;
			}
			else tkerr(addTk(END), "caracter invalid");
			break;
		case 31:
			if (isalpha(ch) || isdigit(ch) || ch == '_') {
				pCrtCh++;
			}
			else {
				state = 32;
			}
			break;
		case 32:
			nCh = pCrtCh - pStartCh;
			if (nCh == 5 && !memcmp(pStartCh, "break", 5))
				tk = addTk(BREAK);
			else if (nCh == 4 && !memcmp(pStartCh, "char", 4))
				tk = addTk(CHAR);
			else if (nCh == 5 && !memcmp(pStartCh, "double", 5))
				tk = addTk(DOUBLE);
			else if (nCh == 4 && !memcmp(pStartCh, "else", 4))
				tk = addTk(ELSE);
			else if (nCh == 3 && !memcmp(pStartCh, "for", 3))
				tk = addTk(FOR);
			else if (nCh == 2 && !memcmp(pStartCh, "if", 2))
				tk = addTk(IF);
			else if (nCh == 3 && !memcmp(pStartCh, "int", 3))
				tk = addTk(INT);
			else if (nCh == 6 && !memcmp(pStartCh, "return", 6))
				tk = addTk(RETURN);
			else if (nCh == 6 && !memcmp(pStartCh, "struct", 6))
				tk = addTk(STRUCT);
			else if (nCh == 4 && !memcmp(pStartCh, "void", 4))
				tk = addTk(VOID);
			else if (nCh == 5 && !memcmp(pStartCh, "while", 5))
				tk = addTk(WHILE);
			else {
				tk = addTk(ID);
				tk->text = createString(pStartCh, pCrtCh - 1);
			}
			return tk->code;
		case 1:
			if (isdigit(ch)) {
				pCrtCh++;
			}
			else if (ch == '.') {
				pCrtCh++;
				state = 8;
			}
			else if (ch == 'e' || ch == 'E') {
				pCrtCh++;
				state = 10;
			}
			else {
				state = 2;
			}
			break;
		case 2:
			tk = addTk(CT_INT);
			if ((*(pStartCh + 1)) == 'x') {
				tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 16);
			}
			else if ((*pStartCh) == '0') {
				tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 8);
			}
			else
				tk->i = tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 10);
			return CT_INT;
		case 3:
			if (ch == 'x') {
				pCrtCh++;
				state = 5;
			}
			else if (ch == '9' || ch == '8') {
				pCrtCh++;
				state = 7;
			}
			else {
				pCrtCh++;
				state = 4;
			}
			break;
		case 4:
			if (ch - '0' <= 7 && ch - '0' >= 0) {
				pCrtCh++;
			}
			else if (ch == '.')
			{
				pCrtCh++;
				state = 9;
			}
			else if (ch == 'e' || ch == 'E')
			{
				pCrtCh++;
				state = 10;
			}
			else if (ch == '9' || ch == '8')
			{
				pCrtCh++;
				state = 7;
			}
			else {
				state = 2;
			}
			break;
		case 5:
			if (isdigit(ch) || (tolower(ch) <= 'f' && tolower(ch) >= 'a')) {
				pCrtCh++;
				state = 6;
			}
			else tkerr(addTk(END), "Caracter invalid la starea 5.\n");
			break;
		case 6:
			if (isdigit(ch) || (tolower(ch) <= 'f' && tolower(ch) >= 'a')) {
				pCrtCh++;
			}
			else {
				state = 2;
			}
			break;
		case 7:
			if (ch == '.')
			{
				pCrtCh++;
				state = 8;
			}
			else tkerr(addTk(END), "Caracter invalid la starea 7.\n");
			break;

		case 8:
			if (isdigit(ch))
			{
				pCrtCh++;
				state = 9;
			}
			else tkerr(addTk(END), "Caracter invalid la starea 8.\n");
			break;
		case 9:
			if (isdigit(ch))
				pCrtCh++;
			else if (ch == 'e' || ch == 'E')
			{
				pCrtCh++;
				state = 10;
			}
			else
				state = 13;
			break;
		case 10:
			if (ch == '-' || ch == '+') {
				pCrtCh++;
				state = 11;
			}
			else if (isdigit(ch)) {
				pCrtCh++;
				state = 12;
			}
			else tkerr(addTk(END), "Caracter invalid la starea 10.\n");
			break;
		case 11:
			if (isdigit(ch)) {
				pCrtCh++;
				state = 12;
			}
			else tkerr(addTk(END), "Caracter invalid la starea 11.\n");
			break;
		case 12:
			if (isdigit(ch)) {
				pCrtCh++;
			}
			else state = 13;
			break;
		case 13:
			tk = addTk(CT_REAL);
			tk->r = atof(createString(pStartCh, pCrtCh));
			return CT_REAL;
		case 14:
			//pStartCh = pCrtCh;
			if (ch == '\\')
			{
				pCrtCh++;
				state = 16;
			}
			else
				state = 17;
			break;
		case 16:
			if (ch == 'a' || ch == 'b' || ch == 'f' || ch == 'r' || ch == 'n' || ch == 't' || ch == 'v' || ch == '\'' || ch == '?' || ch == '"' || ch == '0' || ch == '\\') {
				pCrtCh++;
				state = 17;
			}
			else tkerr(addTk(END), "Caracter invalid la starea 16.\n");
			break;

		case 17:
			if (ch == '\'')
			{
				pCrtCh++;
				state = 18;
			}
		case 18:
			tk = addTk(CT_CHAR);

			if (*(pStartCh + 1) == 'n')
			{
				tk->i = '\n';
				break;
			}
			else if (*(pStartCh + 1) == 't')
			{
				tk->i = '\t';
				break;
			}
			else if (*(pStartCh + 1) == '\\')
			{
				tk->i = '\\';
				break;
			}
			else
			{

				tk->i = *pCrtCh - '0';
				return CT_CHAR;
			}
		case 19:
			if (ch == '\\')
			{
				pCrtCh++;
				state = 21;
			}
			else if (ch == '"')
			{
				pCrtCh++;
				state = 23;
			}
			else
				pCrtCh++;
			break;
		case 21:
			if (ch == 'a' || ch == 'b' || ch == 'f' || ch == 'r' || ch == 'n' || ch == 't' || ch == 'v' || ch == '\'' || ch == '?' || ch == '"' || ch == '0' || ch == '\\') {
				pCrtCh++;
				state = 22;
			}
			else tkerr(addTk(END), "Caracter invalid la starea 21.\n");
			break;
		case 22:
			if (ch == '"')
			{
				pCrtCh++;
				state = 23;
			}
			else
				state = 19;
			break;
		case 23:

			tk = addTk(CT_STRING);
			tk->text = createString(pStartCh + 1, pCrtCh - 2);
			return CT_STRING;
		case 24:
			if (ch == '*')
			{
				pCrtCh++;
				state = 26;
			}
			else if (ch == '/')
			{
				pCrtCh++;
				state = 61;
			}
			else
				state = 25;
			break;
		case 25:
			addTk(DIV);
			return DIV;
		case 26:
			if (ch == '*')
			{
				pCrtCh++;
				state = 27;
			}
			else
				pCrtCh++;
			break;
		case 27:
			if (ch == '*')
				pCrtCh++;
			if (ch == '/')
				state = 0;
			else
				state = 26;
			break;
		case 33:
			addTk(COMA);
			return COMA;
		case 34:
			addTk(SEMICOLON);
			return SEMICOLON;
		case 35:
			addTk(LPAR);
			return LPAR;
		case 36:
			addTk(RPAR);
			return RPAR;
		case 38:
			addTk(LBRACKET);
			return LBRACKET;
		case 37:
			addTk(RBRACKET);
			return RBRACKET;
		case 39:
			addTk(LACC);
			return LACC;
		case 40:
			addTk(RACC);
			return RACC;
		case 41:
			if (ch == '=')
			{
				pCrtCh++;
				state = 43;
			}
			else state = 42;
			break;
		case 42:
			addTk(GREATER);
			return GREATER;
		case 43:
			addTk(GREATEREQ);
			return GREATEREQ;
		case 44:
			if (ch == '=')
			{
				pCrtCh++;
				state = 46;
			}
			else
				state = 45;
			break;
		case 45:
			addTk(LESS);
			return LESS;
		case 46:
			addTk(LESSEQ);
			return LESSEQ;
		case 47:
			addTk(DOT);
			return DOT;
		case 48:
			addTk(ADD);
			return ADD;
		case 49:
			addTk(SUB);
			return SUB;
		case 50:
			addTk(MUL);
			return MUL;
		case 51:
			if (ch == '&')
			{
				pCrtCh++;
				state = 52;
				break;
			}
			else
				tkerr(addTk(END), "Caracter invalid la starea 51.\n");
			break;
		case 52:
			addTk(AND);
			return AND;
		case 53:
			if (ch == '|')
			{

				pCrtCh++;
				state = 54;
			}
			else tkerr(addTk(END), "Caracter invalid la starea 53.\n");
			break;
		case 54:
			addTk(OR);
			return OR;
		case 55:
			if (ch == '=')
			{
				pCrtCh++;
				state = 57;
			}
			else
				state = 56;
			break;
		case 56:
			addTk(ASSIGN);
			return ASSIGN;
		case 57:
			addTk(EQUAL);
			return EQUAL;
		case 58:
			if (ch == '=')
			{
				pCrtCh++;
				state = 59;
			}
			else
				state = 60;
			break;
		case 59:
			addTk(NOTEQ);
			return NOTEQ;
		case 60:
			addTk(NOT);
			return NOT;
		case 61:
			if (ch != '\n' || ch != '\t' || ch != '\r')
				pCrtCh++;
			else
				state = 0;
			break;


		}
	}
}

void afisare() {
	char *it = pCrtCh;
	while ((*it) != '\0') {
		printf("%c", *it);
		it++;
	}
	printf("\n");
}
void printAtom(Token *tk) {
	if (tk->code == END) {
		printf("END\n");
	}
	else if (tk->code == COMA) {
		printf("COMMA ");
	}
	else if (tk->code == SEMICOLON) {
		printf("SEMICOLON ");
	}
	else if (tk->code == LPAR) {
		printf("LPAR ");
	}
	else if (tk->code == RPAR) {
		printf("RPAR ");
	}
	else if (tk->code == LBRACKET) {
		printf("LBRACKET ");
	}
	else if (tk->code == RBRACKET) {
		printf("RBRACKET ");
	}
	else if (tk->code == LACC) {
		printf("LACC ");
	}
	else if (tk->code == RACC) {
		printf("RACC ");
	}
	else if (tk->code == ADD) {
		printf("ADD ");
	}
	else if (tk->code == SUB) {
		printf("SUB ");
	}
	else if (tk->code == MUL) {
		printf("MUL ");
	}
	else if (tk->code == DOT) {
		printf("DOT ");
	}
	else if (tk->code == AND) {
		printf("AND ");
	}
	else if (tk->code == OR) {
		printf("OR ");
	}
	else if (tk->code == DIV) {
		printf("DIV ");
	}
	else if (tk->code == NOT) {
		printf("NOT ");
	}
	else if (tk->code == NOTEQ) {
		printf("NOTEQ ");
	}
	else if (tk->code == ASSIGN) {
		printf("ASSIGN ");
	}
	else if (tk->code == EQUAL) {
		printf("EQUAL ");
	}
	else if (tk->code == GREATER) {
		printf("GREATER ");
	}
	else if (tk->code == GREATEREQ) {
		printf("GREATEREQ ");
	}
	else if (tk->code == LESS) {
		printf("LESS ");
	}
	else if (tk->code == LESSEQ) {
		printf("LESSEQ ");
	}
	else if (tk->code == ID) {
		printf("ID:%s ", tk->text);
	}
	else if (tk->code == CT_CHAR) {
		printf("CT_CHAR:%c ", tk->i + '0');
	}
	else if (tk->code == CT_STRING) {
		printf("CT_STRING:%s ", tk->text);
	}
	else if (tk->code == CT_INT) {
		printf("CT_INT:%d ", tk->i);
	}
	else if (tk->code == CT_REAL) {
		printf("CT_REAL:%f ", tk->r);
	}
	else if (tk->code == BREAK) {
		printf("BREAK ");
	}
	else if (tk->code == CHAR) {
		printf("CHAR ");
	}
	else if (tk->code == DOUBLE) {
		printf("DOUBLE ");
	}
	else if (tk->code == ELSE) {
		printf("ELSE ");
	}
	else if (tk->code == FOR) {
		printf("FOR ");
	}
	else if (tk->code == IF) {
		printf("IF ");
	}
	else if (tk->code == INT) {
		printf("INT ");
	}
	else if (tk->code == RETURN) {
		printf("RETURN ");
	}
	else if (tk->code == STRUCT) {
		printf("STRUCT ");
	}
	else if (tk->code == VOID) {
		printf("VOID ");
	}
	else if (tk->code == WHILE) {
		printf("WHILE ");
	}
}

void Atoms_afis() {
	Token *tk1 = tokens;
	printf("Result:\n");
	while (tk1 != NULL) {
		printAtom(tk1);
		tk1 = tk1->next;
	}
	printf("\n");
}

int main() {
	FILE *f;
	int noCh;

	if ((f = fopen("a.txt", "r")) == NULL) {
		printf("Eroare la deschiderea fisierului\n");
		exit(-1);
	}

	if ((noCh = fread(buff, 1, 50000, f)) <= 0) {
		printf("Eroare la citirea din fisier\n");
		exit(-1);
	}
	buff[noCh] = '\0';
	afisare();

	while ((*pCrtCh) != '\0') {
		getNextToken();
	}
	addTk(END);
	Atoms_afis();
	fclose(f);
	return 0;
}