Advertisement
chasesmith20

scanner.c

Feb 26th, 2020
510
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 9.39 KB | None | 0 0
  1. /*Group 2 Program 2
  2. Paul Maclean- mac7537@calu.edu
  3. Mike Gorse- gor9632@calu.edu
  4. Robert Breckenridge- bre6896@calu.edu
  5. Chase Smith- smi8808@calu.edu
  6.  
  7. CSC 460
  8. Language Translations
  9. */
  10.  
  11. #include "file_util.h"
  12. #include "scanner.h"
  13.  
  14.  
  15. #define LINE_BUFF_SIZE 1024
  16. #define TOKEN_BUFF_SIZE 1024
  17. #define OUTFILE_BUFF_SIZE 1024
  18. #define TOKEN_TYPE_SIZE 20
  19.  
  20. fpos_t old_pos;                                   //global variable for the old position
  21. fpos_t cur_pos;                                   //global variable for the current position
  22.  
  23. char lineBuff[LINE_BUFF_SIZE] = { '\0' };
  24. char tokenBuff[TOKEN_BUFF_SIZE] = { '\0' };
  25. char tokenType[TOKEN_TYPE_SIZE] = { '\0' };
  26.  
  27. char errorBuffer[OUTFILE_BUFF_SIZE] = { '\0' };    //for formatting lexical errors in listing file
  28. char lisfileBuffer[OUTFILE_BUFF_SIZE] = { '\0' };  //for formatting numbered lines in listing file
  29.  
  30. char Lex_Err_Buff[LINE_BUFF_SIZE] = { '\0' };
  31. int Lex_Err_Total = 0;
  32. int Lex_Err_Index = 0;
  33.  
  34. int buffpos = 0;
  35.  
  36. int lineCount = 1;
  37. int linePos = 0;
  38.  
  39. TokenId thistoken;
  40.  
  41.  
  42.  
  43.  
  44.  
  45. //*** Scanner Support Functions ***//
  46.  
  47. logical isDelimiter(char currchar) {
  48.     return (currchar == ' ' || currchar == '\n' || currchar == '\t' || currchar == EOF);
  49. }
  50.  
  51.  
  52. //Non-destructive character lookahead. Returns capital, for easier use.
  53. char peekChar() {
  54.     char filechar = fgetc(InpFile);
  55.     ungetc(filechar, InpFile);
  56.     return toupper(filechar);
  57. }
  58.  
  59.  
  60. //Consumes a character from the InpFile's instream. Also prints lines & lex errors to LisFile
  61. char consumeChar(logical permanent) {
  62.     char filechar = fgetc(InpFile);
  63.  
  64.     if (permanent) {
  65.         //Check if a list needs to be printed to the lisfile
  66.         if (filechar == '\n' || (filechar == EOF && linePos > 0)) {
  67.             int i = 0;
  68.  
  69.             //Format the collected line. No need to clear buffer.
  70.             sprintf(lisfileBuffer, "%2d   %s\n", lineCount, lineBuff);
  71.             fputs(lisfileBuffer, LisFile);
  72.  
  73.             //Print each lex error
  74.             for (i = 0; i < Lex_Err_Index; i++)
  75.             {
  76.                 char currErr = Lex_Err_Buff[i];
  77.                 sprintf(errorBuffer, "Lexical Error on line %2d-   %c not recognized\n", lineCount, currErr);
  78.                 fputs(errorBuffer, LisFile);
  79.             }
  80.             Lex_Err_Index = 0; //Reset errors for next line
  81.  
  82.             //Clear error and line buffers
  83.             memset(Lex_Err_Buff, '\0', LINE_BUFF_SIZE);
  84.             memset(lineBuff, '\0', LINE_BUFF_SIZE);
  85.  
  86.             //Reset line index
  87.             linePos = 0;
  88.             lineCount++;
  89.         }
  90.         else {
  91.             //Append character to lineBuffer.
  92.             lineBuff[linePos] = filechar;
  93.             linePos++;
  94.         }
  95.     }
  96.  
  97.     return toupper(filechar);
  98. }
  99.  
  100.  
  101. //Consumes a digit. Also consumes the character given to the function (guaranteed to be a "-" or a digit)
  102. void gatherDigit(char currchar, logical permanent) {
  103.     logical stillDigit = ltrue;
  104.  
  105.     do {
  106.         tokenBuff[buffpos] = currchar;
  107.         buffpos++;
  108.         currchar = peekChar();
  109.  
  110.         if (isdigit(currchar) && !isDelimiter(currchar) && buffpos < TOKEN_BUFF_SIZE) {
  111.             consumeChar(permanent);
  112.         }
  113.         else
  114.             stillDigit = lfalse;
  115.  
  116.     } while (stillDigit);
  117. }
  118.  
  119. //Setts thistoken to the given token and places a tokenval into the tokenBuff. Ex: (GREATEREQUALOP, ">=")
  120. void setToken(TokenId token, char* tokenvalue) {
  121.     thistoken = token;
  122.  
  123.     //if NULL is provided, do nothing
  124.     if (tokenvalue != NULL)
  125.         strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, tokenvalue);
  126. }
  127.  
  128. void clearBuffer(char* buffer, int bufferSize)
  129. {
  130.     int i = 0;
  131.     for (i; i < bufferSize; i++)
  132.     {
  133.         buffer[i] = '\0';
  134.     }
  135. }
  136.  
  137. char* decode(TokenId typeid)
  138. {
  139.     return (char*)TOKEN_TYPES[typeid];
  140. }
  141.  
  142. TokenId getReservedId() {
  143.     TokenId foundtoken = ID; //assume it's an ID, because
  144.     int i = 0;
  145.     //only go up to 13 because that's the last enterable reserved word
  146.     for (i = 0; i < 13; i++) {
  147.         if (strcmp(tokenBuff, TOKEN_TYPES[i]) == 0) {
  148.             foundtoken = i;
  149.         }
  150.     }
  151.  
  152.     return foundtoken;
  153. }
  154.  
  155.  
  156.  
  157. TokenId getToken(logical permanent)
  158. {
  159.     //utilizing fgetpos() and fsetpos()
  160.     int fgetpos(FILE * stream, fpos_t * pos);
  161.     FILE* InpFile;
  162.     fpos_t old_pos;
  163.  
  164.     fgetpos(InpFile, &old_pos);
  165.  
  166.     fsetpos(InpFile, &old_pos);
  167.    
  168.     //if not permanent, set old pos here
  169.     if (!permanent) {
  170.         old_pos = 1;
  171.     }
  172.  
  173.     thistoken = -1; //Claim no token found
  174.     buffpos = 0;
  175.     char currchar;
  176.  
  177.     do {
  178.         currchar = consumeChar(permanent);
  179.  
  180.         if (isspace(currchar)) {
  181.             //do nothing
  182.         }
  183.         else if (isalpha(currchar)) { //Compose ID/Reserved Word
  184.             logical stillId = ltrue;
  185.  
  186.             do {
  187.                 tokenBuff[buffpos] = currchar;
  188.                 buffpos++;
  189.                 currchar = peekChar();
  190.                 if (isalnum(currchar) && !isDelimiter(currchar) && buffpos < TOKEN_BUFF_SIZE)
  191.                     consumeChar(permanent);
  192.                 else
  193.                     stillId = lfalse;
  194.  
  195.             } while (stillId);
  196.  
  197.             setToken(getReservedId(), NULL);
  198.         }
  199.         else if (isdigit(currchar)) { //Compose INTLITERAL
  200.             gatherDigit(currchar, permanent);
  201.             setToken(INTLITERAL, NULL);
  202.         }
  203.         else if (currchar == '-') { //check possible - paths (--COMMENT, -INTLITERAL, -)
  204.             char lookahead = peekChar();
  205.  
  206.             if (lookahead == '-') { //compose a comment
  207.                 do {
  208.                     currchar = consumeChar(permanent); //consume chars until newline. consume the newline, too.
  209.                 } while (currchar != '\n' && currchar != EOF && buffpos < TOKEN_BUFF_SIZE);
  210.  
  211.             }
  212.             else if (isdigit(lookahead)) {
  213.                 gatherDigit(currchar, permanent);
  214.                 setToken(INTLITERAL, NULL);
  215.             }
  216.             else {
  217.                 setToken(MINUSOP, "-");
  218.             }
  219.  
  220.         }
  221.         else if (currchar == ':') { //check possible : paths (:=, lex error)
  222.             char lookahead = peekChar();
  223.  
  224.             if (lookahead == '=') {
  225.                 consumeChar(permanent);
  226.                 setToken(ASSIGNOP, ":=");
  227.             }
  228.             else {
  229.                 setToken(LEXERR, NULL);
  230.  
  231.                 tokenBuff[buffpos] = currchar;
  232.                 Lex_Err_Buff[Lex_Err_Index] = currchar;
  233.                 Lex_Err_Index++;
  234.                 Lex_Err_Total++;
  235.             }
  236.         }
  237.         else if (currchar == '<') { //check possible : paths (<=, <>, <)
  238.             char lookahead = peekChar();
  239.  
  240.             if (lookahead == '=') {
  241.                 consumeChar(permanent);
  242.                 setToken(LESSEQUALOP, "<=");
  243.             }
  244.             else if (lookahead == '>') {
  245.                 consumeChar(permanent);
  246.                 setToken(NOTEQUALOP, "<>");
  247.             }
  248.             else {
  249.                 setToken(LESSOP, "<");
  250.             }
  251.         }
  252.         else if (currchar == '>') { //check possible : paths (>=, >)
  253.             char lookahead = peekChar();
  254.  
  255.             if (lookahead == '=') {
  256.                 consumeChar(permanent); //eat the '='
  257.                 setToken(GREATEREQUALOP, ">=");
  258.             }
  259.             else {
  260.                 setToken(GREATEROP, ">");
  261.             }
  262.         }
  263.         else if (currchar == '(') {
  264.             setToken(LPAREN, "(");
  265.         }
  266.         else if (currchar == ')') {
  267.             setToken(RPAREN, ")");
  268.         }
  269.         else if (currchar == ';') {
  270.             setToken(SEMICOLON, ";");
  271.         }
  272.         else if (currchar == ',') {
  273.             setToken(COMMA, ",");
  274.         }
  275.         else if (currchar == '+') {
  276.             setToken(PLUSOP, "+");
  277.         }
  278.         else if (currchar == '*') {
  279.             setToken(MULTOP, "*");
  280.         }
  281.         else if (currchar == '/') {
  282.             setToken(DIVOP, "/");
  283.         }
  284.         else if (currchar == '!') {
  285.             setToken(NOTOP, "!");
  286.         }
  287.         else if (currchar == '=') {
  288.             setToken(EQUALOP, "=");
  289.         }
  290.         else if (currchar == EOF) {
  291.             setToken(SCANEOF, "EOF");
  292.         }
  293.         else {
  294.             tokenBuff[buffpos] = currchar;
  295.             thistoken = LEXERR;
  296.  
  297.             Lex_Err_Buff[Lex_Err_Index] = currchar;
  298.             Lex_Err_Index++;
  299.             Lex_Err_Total++;
  300.         }
  301.     } while (thistoken == -1 && currchar != EOF);
  302.  
  303.  
  304.  
  305.     //if not permanent, set cur pos to old pos
  306.     if (!permanent) {
  307.         cur_pos = old_pos;
  308.     }
  309.  
  310.     return thistoken;
  311. }
  312.  
  313.  
  314.  
  315. //*** Primary Scanner Function ***//
  316.  
  317. void scanner(FILE* InpFile)
  318. {
  319.  
  320.     char printBuffer[OUTFILE_BUFF_SIZE] = { '\0' };
  321.  
  322.     TokenId thistoken;
  323.  
  324.     do {
  325.         thistoken = getToken(lfalse); //CURRENTLY ALWAYS PEEKING NEXT TOKEN. BEWARE.
  326.  
  327.         //parsley goes here
  328.  
  329.         char* tokentypeptr = decode(thistoken);
  330.  
  331.         clearBuffer(printBuffer, OUTFILE_BUFF_SIZE);
  332.         sprintf(printBuffer, "Token Number: %-12dToken Type: %-15sActual Token: %-15s\n", thistoken, tokentypeptr, tokenBuff);
  333.  
  334.         printf("%s", printBuffer);
  335.         //fputs(printBuffer, OutFile);
  336.  
  337.         clearBuffer(tokenBuff, TOKEN_BUFF_SIZE);
  338.     } while (thistoken != SCANEOF);
  339.  
  340.  
  341.     clearBuffer(printBuffer, OUTFILE_BUFF_SIZE);
  342.     sprintf(printBuffer, "Number of total errors: %d\n", Lex_Err_Total);
  343.     fputs(printBuffer, LisFile);
  344.  
  345. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement