Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include<stdio.h>
- #include <stdlib.h>
- #include <stdarg.h>
- #include <ctype.h>
- #include <string.h>
- #include <unistd.h>
- #define SAFEALLOC(var,Type) if((var=(Type*)malloc(sizeof(Type)))==NULL) err("not enough memory");
- char input[50001];
- int line;
- char *pCtrCh;
- enum {ID, BREAK, CHAR, DOUBLE, ELSE, FOR, IF, INT, RETURN, STRUCT, VOID, WHILE, CT_INT, CT_REAL, CT_CHAR, CT_STRING, COMMA, SEMICOLON, LPAR, RPAR, LBRACKET, RBRACKET, LACC, RACC, ADD, SUB, MUL, DIV, DOT, AND, OR, NOT, ASSIGN, EQUAL, NOTEQ, LESS, LESSEQ, GREATER, GREATEREQ, SPACE, LINECOMMENT, COMMENT, END };
- char *word[43] = {"ID", "BREAK", "CHAR", "DOUBLE", "ELSE", "FOR", "IF", "INT", "RETURN", "STRUCT", "VOID", "WHILE", "CT_INT", "CT_REAL", "CT_CHAR", "CT_STRING", "COMMA", "SEMICOLON", "LPAR", "RPAR", "LBRACKET", "RBRACKET", "LACC", "RACC", "ADD", "SUB", "MUL", "DIV", "DOT", "AND", "OR", "NOT", "ASSIGN", "EQUAL", "NOTEQ", "LESS", "LESSEQ", "GREATER", "GREATEREQ", "SPACE", "LINECOMMENT", "COMMENT", "END" };;
- typedef struct _Token {
- int code;
- union {
- char *text;
- long int i;
- double r;
- };
- int line;
- struct _Token *next;
- } Token;
- Token *tokens, *lastToken;
- void err(const char *fmt,...) {
- va_list va;
- va_start(va,fmt);
- fprintf(stderr,"error: ");
- vfprintf(stderr,fmt,va);
- fputc('\n',stderr);
- va_end(va);
- exit(-1);
- }
- Token *addTk(int code) {
- Token *tk;
- SAFEALLOC(tk,Token)
- tk->code=code;
- tk->line=line;
- tk->next=NULL;
- if(lastToken){
- lastToken->next=tk;
- }else{
- tokens=tk;
- }
- lastToken=tk;
- return tk;
- }
- void tkerr(const Token *tk,const char *fmt,...) {
- va_list va;
- va_start(va,fmt);
- fprintf(stderr,"error in line %d: ",tk->line);
- vfprintf(stderr,fmt,va);
- fputc('\n',stderr);
- va_end(va);
- exit(-1);
- }
- char *createString(const char *pStartCh, char *pCtrCh){
- int len = pCtrCh-pStartCh;
- char *c = (char*)malloc(sizeof(char)*(len+1));
- snprintf(c, len, "%s", pStartCh);
- //strncpy(c, pStartCh, len);
- c[len] = '\0';
- return c;
- }
- int getNextToken() {
- int state=0, nCh;
- char ch;
- const char *pStartCh;
- Token *tk;
- while(1){
- ch=*pCtrCh;
- switch(state){
- case 0:
- if (ch>='1' && ch<='9'){
- pStartCh=pCtrCh;
- pCtrCh++;
- state=1;
- }
- else if (ch=='0') {
- pStartCh=pCtrCh;
- pCtrCh++;
- state=2;
- }
- else if (isalpha(ch) || ch=='_'){
- pStartCh=pCtrCh;
- pCtrCh++;
- state=29;
- }
- else if (ch=='"'){
- pStartCh=pCtrCh;
- pCtrCh++;
- state=18;
- }
- else if (ch=='\''){
- pStartCh=pCtrCh;
- pCtrCh++;
- state=13;
- }
- else if (ch=='/'){
- pCtrCh++;
- state=23;
- }
- else if (ch==','){
- pCtrCh++;
- state=32;
- }
- else if (ch==';'){
- pCtrCh++;
- state=33;
- }
- else if (ch=='('){
- pCtrCh++;
- state=34;
- }
- else if (ch==')'){
- pCtrCh++;
- state=35;
- }
- else if (ch=='{'){
- pCtrCh++;
- state=36;
- }
- else if (ch=='}'){
- pCtrCh++;
- state=37;
- }
- else if (ch=='['){
- pCtrCh++;
- state=38;
- }
- else if (ch==']'){
- pCtrCh++;
- state=39;
- }
- else if (ch=='+'){
- pCtrCh++;
- state=40;
- }
- else if (ch=='-'){
- pCtrCh++;
- state=41;
- }
- else if (ch=='*'){
- pCtrCh++;
- state=42;
- }
- else if (ch=='.'){
- pCtrCh++;
- state=43;
- }
- else if (ch=='&'){
- pCtrCh++;
- state=44;
- }
- else if (ch=='|'){
- pCtrCh++;
- state=46;
- }
- else if (ch=='='){
- pCtrCh++;
- state=48;
- }
- else if (ch=='!'){
- pCtrCh++;
- state=49;
- }
- else if (ch=='<'){
- pCtrCh++;
- state=50;
- }
- else if (ch=='>'){
- pCtrCh++;
- state=51;
- }
- else if (ch==' '||ch=='\r'||ch=='\t'){
- pCtrCh++;
- }
- else if (ch=='\n'){
- line++;
- pCtrCh++;
- }
- else if (ch==0){
- addTk(END);
- return END;
- }
- else
- tkerr(addTk(END),"caracter invalid11");
- break;
- case 1:
- if (ch=='.'){
- pCtrCh++;
- state=7;
- }
- else if (ch=='e'||ch=='E'){
- pCtrCh++;
- state=9;
- }
- else if (ch>='0' && ch<='9'){
- pCtrCh++;
- }
- else
- state=6;
- break;
- case 2:
- if (ch=='x'){
- pCtrCh++;
- state=4;
- }
- else{
- state=3;
- }
- break;
- case 3:
- if (ch=='.'){
- pCtrCh++;
- state=7;
- }
- else if (ch=='e'|| ch=='E'){
- pCtrCh++;
- state=9;
- }
- else if (ch>='0' && ch<='7'){
- pCtrCh++;
- }
- else
- state=6;
- break;
- case 4:
- if (isalnum(ch)){
- pCtrCh++;
- state=5;
- }
- else
- tkerr(addTk(END),"caracter invalid");
- break;
- case 5:
- if (isalnum(ch)){
- pCtrCh++;
- }
- else
- state=6;
- break;
- case 6:
- tk=addTk(CT_INT);
- tk->i=atoi(createString(pStartCh,pCtrCh));
- return tk->code;
- // addTk(CT_INT);
- // return CT_INT;
- case 7:
- if (ch>='0' && ch<='9'){
- pCtrCh++;
- state=8;
- }
- else
- tkerr(addTk(END),"caracter invalid");
- break;
- case 8:
- if (ch>='0' && ch<='9'){
- pCtrCh++;
- }
- else if (ch =='e' || ch =='E'){
- pCtrCh++;
- state=9;
- }
- else
- state = 12;
- break;
- case 9:
- if (ch =='+' || ch =='-'){
- pCtrCh++;
- state=10;
- }
- else
- state=10;
- break;
- case 10:
- if (ch>='0' && ch<='9'){
- pCtrCh++;
- state=11;
- }
- else
- tkerr(addTk(END),"caracter invalid");
- break;
- case 11:
- if (ch>='0' && ch<='9'){
- pCtrCh++;
- }
- else
- state=12;
- break;
- case 12:
- tk=addTk(CT_REAL);
- tk->r=strtod(createString(pStartCh,pCtrCh),NULL);
- return tk->code;
- // addTk(CT_REAL);
- // return CT_REAL;
- case 13:
- if (ch =='\\'){
- pCtrCh++;
- state=15;
- }
- else if (ch !='\'' && ch !='\\'){
- pCtrCh++;
- state=16;
- }
- else
- tkerr(addTk(END),"caracter invalid");
- break;
- case 15:
- if (ch =='a' || ch =='b' || ch =='f' || ch =='n' || ch =='r' || ch =='t' || ch =='v' || ch =='\'' || ch =='?' || ch =='"' || ch =='\0' || ch == '\\'){
- pCtrCh++;
- state=16;
- }
- else
- tkerr(addTk(END),"caracter invalid");
- break;
- case 16:
- if (ch =='\''){
- pCtrCh++;
- state=17;
- }
- else
- tkerr(addTk(END),"caracter invalid");
- break;
- case 17:
- tk=addTk(CT_CHAR);
- tk->i=createString(pStartCh,pCtrCh)[1];
- return tk->code;
- //addTk(CT_CHAR);
- //return CT_CHAR;
- case 18:
- if (ch =='\\'){
- pCtrCh++;
- state=20;
- }
- else if (ch !='"' && ch !='\\'){
- pCtrCh++;
- state=21;
- }
- else if (ch == '"'){
- pCtrCh++;
- state=22;
- }
- else
- tkerr(addTk(END),"caracter invalid2");
- break;
- case 20:
- if (ch =='a' || ch =='b' || ch =='f' || ch =='n' || ch =='r' || ch =='t' || ch =='v' || ch =='\'' || ch =='?' || ch =='"' || ch =='\0'){
- pCtrCh++;
- state=21;
- }
- else
- tkerr(addTk(END),"caracter invalid3");
- break;
- case 21:
- if (ch =='"'){
- pCtrCh++;
- state=22;
- }
- else {
- // pCtrCh++;
- state=18;
- }
- break;
- case 22:
- tk=addTk(CT_STRING);
- tk->text=createString(pStartCh,pCtrCh);
- return tk->code;
- // addTk(CT_STRING);
- // return CT_STRING;
- case 23:
- if (ch =='/'){
- pCtrCh++;
- state=27;
- }
- else if (ch =='*'){
- pCtrCh++;
- state=24;
- }
- else
- state=31;
- break;
- case 24:
- if (ch =='*'){
- pCtrCh++;
- state=25;
- }
- else if (ch !='*'){
- pCtrCh++;
- }
- else
- tkerr(addTk(END),"caracter1111 invalid");
- break;
- case 25:
- if (ch =='/'){
- pCtrCh++;
- state=0;
- }
- else if (ch =='*'){
- pCtrCh++;
- }
- else if (ch !='*' && ch!='/' ){
- pCtrCh++;
- state = 24;
- }
- else
- tkerr(addTk(END),"caracter2222 invalid");
- break;
- case 27:
- if (ch !='\n' && ch !='\r' && ch !='\0'){
- pCtrCh++;
- }
- else
- state = 0;
- break;
- case 29:
- if (isalnum(ch) || ch =='_'){
- pCtrCh++;
- }
- else
- state=30;
- break;
- case 30:
- nCh=pCtrCh-pStartCh;//lungimea cuvantului gasit
- //teste cuvinte cheie
- if(nCh==5 && !memcmp(pStartCh,"break",5))
- tk=addTk(BREAK);
- else if (nCh==4 && !memcmp(pStartCh,"char",4))
- tk=addTk(CHAR);
- else if (nCh==6 && !memcmp(pStartCh,"double",6))
- tk=addTk(DOUBLE);
- else if (nCh==4 && !memcmp(pStartCh,"else",4))
- tk=addTk(ELSE);
- else if (nCh==3 && !memcmp(pStartCh,"for",3))
- tk=addTk(FOR);
- else if (nCh==2 && !memcmp(pStartCh,"if",2))
- tk=addTk(IF);
- else if (nCh==3 && !memcmp(pStartCh,"int",3))
- tk=addTk(INT);
- else if (nCh==6 && !memcmp(pStartCh,"return",6))
- tk=addTk(RETURN);
- else if (nCh==6 && !memcmp(pStartCh,"struct",6))
- tk=addTk(STRUCT);
- else if (nCh==4 && !memcmp(pStartCh,"void",4))
- tk=addTk(VOID);
- else if (nCh==5 && !memcmp(pStartCh,"while",5))
- tk=addTk(WHILE);
- //... toate cuvintele cheie ...
- else { // daca nu este un cuvant cheie, atunci e un ID
- tk=addTk(ID);
- tk->text=createString(pStartCh,pCtrCh);
- }
- return tk->code;
- case 31:
- addTk(DIV);
- return DIV;
- case 32:
- addTk(COMMA);
- return COMMA;
- case 33:
- addTk(SEMICOLON);
- return SEMICOLON;
- case 34:
- tk = addTk(LPAR);
- tk->text = 0;
- return tk->code;
- case 35:
- addTk(RPAR);
- return RPAR;
- case 36:
- addTk(LACC);
- return LACC;
- case 37:
- addTk(RACC);
- return RACC;
- case 38:
- addTk(LBRACKET);
- return LBRACKET;
- case 39:
- addTk(RBRACKET);
- return RBRACKET;
- case 40:
- addTk(ADD);
- return ADD;
- case 41:
- addTk(SUB);
- return SUB;
- case 42:
- addTk(MUL);
- return MUL;
- case 43:
- addTk(DOT);
- return DOT;
- case 44:
- if (ch =='&'){
- pCtrCh++;
- state=45;
- }
- else
- tkerr(addTk(END),"caracter invalid");
- break;
- case 45:
- addTk(AND);
- return AND;
- case 46:
- if (ch =='|'){
- pCtrCh++;
- state=47;
- }
- else
- tkerr(addTk(END),"caracter invalid");
- break;
- case 47:
- addTk(OR);
- return OR;
- case 48:
- if (ch =='='){
- pCtrCh++;
- state=52;
- }
- else {
- state=53;
- }
- break;
- case 49:
- if (ch =='='){
- pCtrCh++;
- state=54;
- }
- else {
- state=55;
- }
- break;
- case 50:
- if (ch =='='){
- pCtrCh++;
- state=57;
- }
- else {
- state=56;
- }
- break;
- case 51:
- if (ch =='='){
- pCtrCh++;
- state=58;
- }
- else {
- state=59;
- }
- break;
- case 52:
- addTk(EQUAL);
- return EQUAL;
- case 53:
- addTk(ASSIGN);
- return ASSIGN;
- case 54:
- addTk(NOTEQ);
- return NOTEQ;
- case 55:
- addTk(NOT);
- return NOT;
- case 56:
- addTk(LESS);
- return LESS;
- case 57:
- addTk(LESSEQ);
- return LESSEQ;
- case 58:
- addTk(GREATEREQ);
- return GREATEREQ;
- case 59:
- addTk(GREATER);
- return GREATER;
- }
- }
- }
- // Analizator sintactic
- int main(){
- FILE *fis;
- if ((fis=fopen("8.c","r"))==NULL){
- perror("citire fisier");
- exit(1);
- }
- else {
- int nc=fread(input,1,50000,fis);
- input[nc]='\0';
- pCtrCh=input;
- fclose(fis);
- while(getNextToken()!=END){}
- Token *tk;
- for(tk=tokens; tk->code != 42;tk=tk->next) {
- if (tk->code == ID)
- printf("%s : %s ", word[tk->code], tk->text);
- else if (tk->code == CT_STRING)
- printf("%s : %s ", word[tk->code], tk->text);
- else if (tk->code == CT_CHAR)
- printf("%s : %c", word[tk->code], (int)tk->i);
- else if (tk->code == CT_INT)
- printf("%s : %ld ", word[tk->code], tk->i);
- else if (tk->code == CT_REAL)
- printf("%s : %f ", word[tk->code], tk->r);
- else
- printf("%s ", word[tk->code]);
- }
- printf("%s \n",word[tk->code]);
- }
- printf("...");
- char *c = (char*)malloc(sizeof(char)*(6));
- snprintf(c, 8, "%s", "111\t\t222");
- printf("%s",c);
- printf("...");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement