scanner.c

/*Group 2 Program 2
Paul Maclean- mac7537@calu.edu
Mike Gorse- gor9632@calu.edu
Robert Breckenridge- bre6896@calu.edu
Chase Smith- smi8808@calu.edu

CSC 460
Language Translations
*/
#include "scanner.h"
#include "file_util.h" //i need them logicals...................................... this seems ill-advised


char lineBuff[LINE_BUFF_SIZE] = { '\0' };
char tokenBuff[TOKEN_BUFF_SIZE] = { '\0' };
char tokenType[TOKEN_TYPE_SIZE] = { '\0' };

int buffpos = 0;

int lineCount = 1;
int linePos = 0;

FILE* SrcFile;


//*** Scanner Support Functions ***//

logical isDelimiter(char currchar) {
    return (currchar == ' ' || currchar == '\n' || currchar == '\t' || currchar == EOF);
}

char getUpperChar() {
    char filechar = fgetc(InpFile);
    if (filechar == '\n' || (filechar == EOF && linePos > 0)) {
        char* errorBuffer[OUTFILE_BUFF_SIZE] = { '\0' };
        char* lisfileBuffer[OUTFILE_BUFF_SIZE] = { '\0' };
        char currErr;
        sprintf(lisfileBuffer, "%2d   %s\n", lineCount, lineBuff);
        fputs(lisfileBuffer, LisFile);

		int i = 0;
      for(i;i<Lex_Err_Index;i++)
      {
           currErr = Lex_Err_Buff[i];
           sprintf(errorBuffer, "Lexical Error on line %2d-   %c not recognized\n", lineCount, currErr);
           fputs(errorBuffer, LisFile);
      }
       Lex_Err_Index = 0;
       memset(Lex_Err_Buff, '\0', LINE_BUFF_SIZE);
        //clear line buffer. if we end up putting a character back after this we're fucked
        memset(lineBuff, '\0', LINE_BUFF_SIZE);
        linePos = 0;
        lineCount++;
    }
    else {
        lineBuff[linePos] = filechar;
        linePos++;
    }

    return toupper(filechar);
}

void putBackChar(char currchar) {
    lineBuff[linePos] = '\0';

    //only decrement if able to
    if (linePos > 0)
        linePos--;


    if (currchar != '\n') {
        ungetc(currchar, SrcFile);
    }
}

logical gatherDigit(char currchar) {
    do {
        tokenBuff[buffpos] = currchar;
        currchar = getUpperChar();
        buffpos++;
    } while (isdigit(currchar) && !isDelimiter(currchar) && buffpos < TOKEN_BUFF_SIZE);
    putBackChar(currchar);
}


void clearBuffer(char* buffer, int bufferSize)
{
    int i = 0;
    for (i; i < bufferSize; i++)
    {
        buffer[i] = '\0';
    }
}


char* decode(TokenId typeid)
{
    return TOKEN_TYPES[typeid];
}

TokenId getReservedId() {
    TokenId foundtoken = ID; //assume it's an ID, because
    int i = 0;
    //only go up to 13 because that's the last enterable reserved word
    for (i = 0; i < 13; i++) {
        if (strcmp(tokenBuff, TOKEN_TYPES[i]) == 0) {
            foundtoken = i;
        }
    }

    return foundtoken;
}


TokenId getToken()
{
    TokenId thistoken = -1;

    buffpos = 0;
    char currchar;

    do {
        currchar = getUpperChar();


        if (isspace(currchar)) {
            //skip whitespace
        }
        else if (isalpha(currchar)) { //compose ID

            do {
                tokenBuff[buffpos] = currchar;
                currchar = getUpperChar();
                buffpos++;
            } while (isalnum(currchar) && !isDelimiter(currchar) && buffpos < TOKEN_BUFF_SIZE);

            putBackChar(currchar);

            thistoken = getReservedId();
        }

        else if (isdigit(currchar)) { //compose digit
            gatherDigit(currchar);
            thistoken = INTLITERAL;
        }
        else if (currchar == '-') { //check possible '-' paths
            char lookahead = getUpperChar();
            putBackChar(lookahead); //all paths require an ungetc.

            if (lookahead == '-') { //compose comment
                do {
                    currchar = getUpperChar();
                    buffpos++;
                } while (currchar != '\n' && currchar != EOF && buffpos < TOKEN_BUFF_SIZE);
            }
            else if (isdigit(lookahead)) {
                gatherDigit(currchar);
                thistoken = INTLITERAL;
            }
            else {
                strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "-");
                thistoken = MINUSOP;
            }

        }
        else if (currchar == '(')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "(");
            thistoken = LPAREN;
        }
        else if (currchar == ')')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, ")");
            thistoken = RPAREN;
        }
        else if (currchar == ';')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, ";");
            thistoken = SEMICOLON;
        }
        else if (currchar == ',')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, ",");
            thistoken = COMMA;
        }
        else if (currchar == ':')
        {
            char lookahead = getUpperChar();
            if (lookahead == '=')
            {
                strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, ":=");
                thistoken = ASSIGNOP;
            }
            else
            {
                putBackChar(lookahead);
                strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, ":");
                Lex_Err_Buff[Lex_Err_Index] = currchar;
                Lex_Err_Index++;
                Lex_Err_Total++;
                thistoken = LEXERR;
            }
        }
        else if (currchar == '+')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "+");
            thistoken = PLUSOP;
        }
        else if (currchar == '*')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "*");
            thistoken = MULTOP;
        }
        else if (currchar == '/')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "/");
            thistoken = DIVOP;
        }
        else if (currchar == '!')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "!");
            thistoken = NOTOP;
        }
        else if (currchar == '<')
        {
            char lookahead = getUpperChar();
            if (lookahead == '=')
            {
                strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "<=");
                thistoken = LESSEQUALOP;
            }
            else if (lookahead == '>')
            {
                strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "<>");
                thistoken = NOTEQUALOP;
            }
            else
            {
                putBackChar(lookahead);
                strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "<");
                thistoken = LESSOP;
            }
        }
        else if (currchar == '>')
        {
            char lookahead = getUpperChar();
            if (lookahead == '=')
            {
                strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, ">=");
                thistoken = GREATEREQUALOP;
            }
            else
            {
                putBackChar(lookahead);
                strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, ">");
                thistoken = GREATEROP;
            }
        }
        else if (currchar == '=')
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "=");
            thistoken = EQUALOP;
        }
        else if (currchar == EOF)
        {
            strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, "EOF");
            thistoken = SCANEOF;
        }
        else
        {
            tokenBuff[buffpos] = currchar;
            thistoken = LEXERR;
            Lex_Err_Buff[Lex_Err_Index] = currchar;
            Lex_Err_Index++;
            Lex_Err_Total++;
        }
    } while (thistoken == -1 && currchar != EOF);
    //may need a flag for comments


    return thistoken;
}


//*** Primary Scanner Function ***//

void scanner(FILE* InpFile)
{

    char* printBuffer[OUTFILE_BUFF_SIZE];

    SrcFile = InpFile;

    TokenId thistoken;

    do {
        thistoken = getToken();

        //do things here

        char* tokentypeptr = decode(thistoken);

       // printf("Token Number: %d\t\t\tToken Type: %s\t\t\tActual Token: %s\n", thistoken, tokentypeptr, tokenBuff);

        clearBuffer(printBuffer, OUTFILE_BUFF_SIZE);
        sprintf(printBuffer, "Token Number: %d\t\t\tToken Type: %s\t\t\tActual Token: %s\n", thistoken, tokentypeptr, tokenBuff);
        fputs(printBuffer, OutFile);

        clearBuffer(tokenBuff, TOKEN_BUFF_SIZE);
    } while (thistoken != SCANEOF);
	clearBuffer(printBuffer, OUTFILE_BUFF_SIZE);
    sprintf(printBuffer, "Number of total errors: %d\n", Lex_Err_Total);
    fputs(printBuffer, LisFile);

}