#include #include #include #include #include #define SIZE 2046 #define SAFEALLOC(var,Type) if((var=(Type*)malloc(sizeof(Type)))==NULL)err("not enough memory"); enum{ ID, END, //constants CT_INT, CT_REAL, CT_CHAR, CT_STRING, //keywords BREAK, CHAR, DOUBLE, ELSE, FOR, IF, INT, RETURN, STRUCT, VOID, WHILE, //delimiters COMMA, SEMICOLON, LPAR, RPAR, LBRACKET, RBRACKET, LACC, RACC, //operators ADD, SUB, MUL, DIV, DOT, AND, OR, NOT, ASSIGN, EQUAL, NOTEQ, LESS, LESSEQ, GREATER, GREATEREQ, SPACE, LINECOMMENT, COMMENT }; //tokens codes typedef struct _Token { int code; union{ char *text; long int i; double r; }; int line; struct _Token *next; }Token; Token *tokens; Token *lastToken = NULL; const char *pCrtCh; int line; void err(const char *fmt,...) { va_list va; va_start(va,fmt); fprintf(stderr,"error: "); vfprintf(stderr,fmt,va); fputc('\n',stderr); va_end(va); exit(-1); } void tkerr(const Token *tk,const char *fmt,...) { va_list va; va_start(va,fmt); fprintf(stderr,"error in line %d: ",tk->line); vfprintf(stderr,fmt,va); fputc('\n',stderr); va_end(va); exit(-1); } Token *addTk(int code) { Token *tk; SAFEALLOC(tk,Token) tk->code=code; tk->line=line; tk->next=NULL; if(lastToken) { lastToken->next=tk; } else { tokens=tk; } lastToken=tk; return tk; } char* createString(const char *pStartCh,const char *pCrtCh) { int nCh=pCrtCh-pStartCh+1; char *str; str=(char *)malloc(nCh*sizeof(char)); snprintf(str,nCh,"%s",pStartCh); return str; } char escCode(char ch) { char newCh; switch(ch) { case 'a': newCh='\a'; break; case 'b': newCh='\b'; break; case 'f': newCh='\f'; break; case 'n': newCh='\n'; break; case 'r': newCh='\r'; break; case 't': newCh='\t'; break; case '\'': newCh='\''; break; case '\?': newCh='\?'; break; case '\"': newCh='\"'; break; case '\\': newCh='\\'; break; case '0': newCh='\0'; break; } return newCh; } void printTokens() { Token *current=tokens; while(current!=NULL) { printf("%i", current->code); switch(current->code) { case ID: case CT_STRING: printf(":%s",current->text); break; case CT_CHAR: printf(":%c",(int)current->i); break; case CT_INT: printf(":%li",current->i); break; case CT_REAL: printf(":%lf",current->r); break; } printf(" "); current=current->next; } printf("\n"); } int getNextToken() { int state=0,nCh; char ch; const char *pStartCh; Token *tk; long int ct_int; double ct_real; int ct_char; char *ct_string; char *p; int noBacks=0; int i; while(1) { ch=*pCrtCh; //if(ch=='\0')printf("ch e null in state %i\n",state); //printf("ch=%c\n",ch); switch(state) { case 0: if(ch<='9'&&ch>='1') { pStartCh=pCrtCh; pCrtCh++; state=1; } else if(ch=='0') { pStartCh=pCrtCh; pCrtCh++; state=2; } else if(ch=='/') { pCrtCh++; state=14; } else if(ch==' '||ch=='\r'||ch=='\t') { pCrtCh++; //state=0; } else if(ch=='\n') { line++; pCrtCh++; //state=0; } else if(ch=='\'') { pStartCh=pCrtCh; // ? pCrtCh++; state=18; } else if(ch=='\"') { pStartCh=pCrtCh; // ? pCrtCh++; state=22; } else if(ch==',') { pCrtCh++; state=26; } else if(ch==';') { pCrtCh++; state=27; } else if(ch=='(') { pCrtCh++; state=28; } else if(ch==')') { pCrtCh++; state=29; } else if(ch=='[') { pCrtCh++; state=30; }else if(ch==']') { pCrtCh++; state=31; }else if(ch=='{') { pCrtCh++; state=32; } else if(ch=='}') { pCrtCh++; state=33; } else if(ch=='+') { pCrtCh++; state=34; } else if(ch=='-') { pCrtCh++; state=35; } else if(ch=='*') { pCrtCh++; state=36; } else if(ch=='.') { pCrtCh++; state=38; } else if(ch=='&') { pCrtCh++; state=39; } else if(ch=='|') { pCrtCh++; state=41; } else if(ch=='!') { pCrtCh++; state=43; } else if(ch=='=') { pCrtCh++; state=46; } else if(ch=='<') { pCrtCh++; state=49; } else if(ch=='>') { pCrtCh++; state=52; } else if(ch=='_' || (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z')) { pStartCh=pCrtCh; pCrtCh++; state=55; } else if(ch=='\0') // the end of the input string { printf("At END.\n"); addTk(END); return END; } else { printf("invalid character ch=%c\n",ch); tkerr(addTk(END),"invalid character");} break; case 1: if(ch>='0'&&ch<='9') { pCrtCh++; //state=1; } else if(ch=='.') { pCrtCh++; state=8; } else if(ch=='e'||ch=='E') { pCrtCh++; state=10; } else state=6; break; case 2: if(ch=='x'||ch=='X') { pCrtCh++; state=4; } else state=3; break; case 3: if(ch>='0'&&ch<='7') { pCrtCh++; //state=3; } else if(ch=='.') { pCrtCh++; state=8; } else if(ch=='e'||ch=='E') { pCrtCh++; state=10; } else state=6; break; case 4: if((ch>='0'&&ch<='9') || (ch>='a'&&ch<='f') || (ch>='A'&&ch<='F')) { pCrtCh++; state=5; } //Am nevoie de error? else tkerr(addTk(END),"not a valid int"); break; case 5: if((ch>='0'&&ch<='9') || (ch>='a'&&ch<='f') || (ch>='A'&&ch<='F')) { pCrtCh++; //state=5; } else state=6; break; case 6: //CT_INT //nCh=pCrtCh-pStartCh; //Char *ct_int=createString(pStartCh,pCrtCh); ct_int=strtol(pStartCh,NULL,0); tk=addTk(CT_INT); tk->i=ct_int; return CT_INT; case 8: if(ch>='0'&&ch<='9') { pCrtCh++; state=9; } // error? else tkerr(addTk(END),"after a point should be a digit"); break; case 9: if(ch=='e'||ch=='E') { pCrtCh++; state=10; } else if(ch>='0'&&ch<='9') { pCrtCh++; //state=9; } else state=13; break; case 10: if(ch=='+'||ch=='-') { pCrtCh++; state=11; } else state=11; break; case 11: if(ch>='0'&&ch<='9') { pCrtCh++; state=12; } // error? else //printf("at error ch=%c\n",ch); tkerr(addTk(END),"after + or - should come a digit"); break; case 12: if(ch>='0'&&ch<='9') { pCrtCh++; //state=12; } else state=13; break; case 13: ct_real=strtod(pStartCh,NULL); tk=addTk(CT_REAL); tk->r=ct_real; return CT_REAL; case 14: if(ch=='*') { pCrtCh++; state=15; } else if(ch=='/') { pCrtCh++; state=17; } else state=37; break; case 15: if(ch=='*') { pCrtCh++; state=16; } else { pCrtCh++; //state=15; } break; case 16: if(ch=='*') { pCrtCh++; //state=16; } else if(ch=='/') { pCrtCh++; state=0; } else state=15; break; case 17: if(ch=='\r'||ch=='\0') { pCrtCh++; state=0; } else if(ch=='\n') { line++; pCrtCh++; state=0; } else { pCrtCh++; //state=17; } break; case 18: if(ch=='\\') { //pStartCh=pCrtCh; // ? pCrtCh++; state=19; } else if(ch!='\'') { //pStartCh=pCrtCh; // ? ct_char=ch; pCrtCh++; state=20; } // ch=='\'' // else error? else tkerr(addTk(END)," empty character constant"); break; case 19: //if(ch=='\a'||ch=='\b'||ch=='\f'||ch=='\n'||ch=='\r'||ch=='\t'||ch=='\v'||ch=='\''||ch=='\"'||ch=='\?'||ch=='\\'||ch=='\0') if(strchr("abfnrtv'?\"\\0", ch)) { ct_char=escCode(ch); pCrtCh++; state=20; } // error? else tkerr(addTk(END),"should come a character to be escaped"); break; case 20: if(ch=='\'') { pCrtCh++; state=21; } // error? else //printf("not a qoute ch=%c",ch); tkerr(addTk(END),"not a quote"); break; case 21: //nCh=pCrtCh-pStartCh; //ct_char=createString(pStartCh+1,pCrtCh-1); //tk=addTk(CT_CHAR); //tk->text=ct_char; //ct_char=(int)(*(pStartCh+1)); //ct_char=strtol(pStartCh+1,NULL,0); tk=addTk(CT_CHAR); tk->i=ct_char; return CT_CHAR; case 22: if(ch=='\\') { //pStartCh=pCrtCh; // ? noBacks++; pCrtCh++; state=23; } else if(ch!='\"') { //pStartCh=pCrtCh; // ? pCrtCh++; state=24; } else if (ch=='\"') { pCrtCh++; state=25; } break; case 23: //if(ch=='\a'||ch=='\b'||ch=='\f'||ch=='\n'||ch=='\r'||ch=='\t'||ch=='\v'||ch=='\''||ch=='\"'||ch=='\?'||ch=='\\'||ch=='\0') if(strchr("abfnrtv'?\"\\0", ch)) { pCrtCh++; state=24; } //else error? else tkerr(addTk(END),"should come a character to be escaped"); break; case 24: if(ch=='\"') { pCrtCh++; state=25; } else state=22; break; case 25: ct_string=createString(pStartCh+1,pCrtCh-1); while((p=strchr(ct_string,'\\'))!=NULL) { //p=strchr(ct_string,'\\'); //puts(p); memmove(p,p+1,strlen(p)); *p=escCode(*p); } /* printf("No of \\ is %i\n",noBacks); printf("String before is %s\n",ct_string); for(i=0;itext=ct_string; return CT_STRING; case 26: addTk(COMMA); return COMMA; case 27: addTk(SEMICOLON); return SEMICOLON; case 28: addTk(LPAR); return LPAR; case 29: addTk(RPAR); return RPAR; case 30: addTk(LBRACKET); return LBRACKET; case 31: addTk(RBRACKET); return RBRACKET; case 32: addTk(LACC); return LACC; case 33: addTk(RACC); return RACC; case 34: addTk(ADD); return ADD; case 35: addTk(SUB); return SUB; case 36: addTk(MUL); return MUL; case 37: addTk(DIV); return DIV; case 38: addTk(DOT); return DOT; case 39: if(ch=='&') { pCrtCh++; state=40; } //else error? else tkerr(addTk(END),"should come a &"); break; case 40: addTk(AND); return AND; case 41: if(ch=='|') { pCrtCh++; state=42; } //else error? else tkerr(addTk(END),"should come a |"); break; case 42: addTk(OR); return OR; case 43: if(ch=='=') { pCrtCh++; state=45; } else state=44; break; case 44: addTk(NOT); return NOT; case 45: addTk(NOTEQ); return NOTEQ; case 46: if(ch=='=') { pCrtCh++; state=48; } else state=47; break; case 47: addTk(ASSIGN); return ASSIGN; case 48: addTk(EQUAL); return EQUAL; case 49: if(ch=='=') { pCrtCh++; state=51; } else state=50; break; case 50: addTk(LESS); return LESS; case 51: addTk(LESSEQ); return LESSEQ; case 52: if(ch=='=') { pCrtCh++; state=54; } else state=53; break; case 53: addTk(GREATER); return GREATER; case 54: addTk(GREATEREQ); return GREATEREQ; case 55: if(ch=='_' || (ch>='a'&&ch<='z') || (ch>='A'&&ch<='Z')) { pCrtCh++; //state=55; } else state=56; break; case 56: // the id length nCh=pCrtCh-pStartCh; // keywords tests if(nCh==5&&!memcmp(pStartCh,"break",5)) tk=addTk(BREAK); else if(nCh==4&&!memcmp(pStartCh,"char",4)) tk=addTk(CHAR); else if(nCh==6&&!memcmp(pStartCh,"double",6)) tk=addTk(DOUBLE); else if(nCh==4&&!memcmp(pStartCh,"else",4)) tk=addTk(ELSE); else if(nCh==3&&!memcmp(pStartCh,"for",3)) tk=addTk(FOR); else if(nCh==2&&!memcmp(pStartCh,"if",2)) tk=addTk(IF); else if(nCh==3&&!memcmp(pStartCh,"int",3)) tk=addTk(INT); else if(nCh==6&&!memcmp(pStartCh,"return",6)) tk=addTk(RETURN); else if(nCh==6&&!memcmp(pStartCh,"struct",6)) tk=addTk(STRUCT); else if(nCh==4&&!memcmp(pStartCh,"void",4)) tk=addTk(VOID); else if(nCh==5&&!memcmp(pStartCh,"while",5)) tk=addTk(WHILE); // if no keyword, then it is an ID else { tk=addTk(ID); tk->text=createString(pStartCh,pCrtCh); } return tk->code; } //printf("state=%i\n",state); } } void getTokens() { do { getNextToken(); }while(*pCrtCh); } /* int main() { FILE *file=fopen("code.c","r+"); if(file==NULL) { printf("The file could not be opened.\n"); exit(1); } char *buffer; char *input; int no; input=(char *)malloc(SIZE*sizeof(char)); buffer=(char *)malloc(SIZE*sizeof(char)); strcpy(input,""); while((no=fread(buffer,sizeof(char),SIZE,file))>0) { input=(char *)realloc(input, no*sizeof(char)); strcat(input, buffer); } input=(char *)realloc(input, (strlen(input)+1)*sizeof(char)); input[strlen(input)]='\0'; pCrtCh=input; getTokens(); printTokens(); return 0; } */ int main() { FILE *file=fopen("8.c","r+"); if(file==NULL) { printf("The file could not be opened.\n"); exit(1); } char *input; int size; fseek(file, 0, SEEK_END); // seek to end of file size = ftell(file); // get current file pointer fseek(file, 0, SEEK_SET); // seek back to beginning of file input=(char *)malloc((size+1)*sizeof(char)); fread(input,sizeof(char),size,file); input[size]='\0'; pCrtCh=input; getTokens(); printTokens(); fclose(file); return 0; }