Advertisement
chasesmith20

scanner.c

Feb 26th, 2020
197
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 9.57 KB | None | 0 0
  1. /*Group 2 Program 2
  2. Paul Maclean- mac7537@calu.edu
  3. Mike Gorse- gor9632@calu.edu
  4. Robert Breckenridge- bre6896@calu.edu
  5. Chase Smith- smi8808@calu.edu
  6.  
  7. CSC 460
  8. Language Translations
  9. */
  10.  
  11. #include "file_util.h"
  12. #include "scanner.h"
  13.  
  14.  
  15. #define LINE_BUFF_SIZE 1024
  16. #define TOKEN_BUFF_SIZE 1024
  17. #define OUTFILE_BUFF_SIZE 1024
  18. #define TOKEN_TYPE_SIZE 20
  19.  
  20. fpos_t old_pos;                                   //global variable for the old position
  21. fpos_t cur_pos;                                   //global variable for the current position
  22.  
  23. char lineBuff[LINE_BUFF_SIZE] = { '\0' };
  24. char tokenBuff[TOKEN_BUFF_SIZE] = { '\0' };
  25. char tokenType[TOKEN_TYPE_SIZE] = { '\0' };
  26.  
  27. char errorBuffer[OUTFILE_BUFF_SIZE] = { '\0' };    //for formatting lexical errors in listing file
  28. char lisfileBuffer[OUTFILE_BUFF_SIZE] = { '\0' };  //for formatting numbered lines in listing file
  29.  
  30. char Lex_Err_Buff[LINE_BUFF_SIZE] = { '\0' };
  31. int Lex_Err_Total = 0;
  32. int Lex_Err_Index = 0;
  33.  
  34. int buffpos = 0;
  35.  
  36. int lineCount = 1;
  37. int linePos = 0;
  38.  
  39. TokenId thistoken;
  40.  
  41.  
  42.  
  43.  
  44.  
  45. //*** Scanner Support Functions ***//
  46.  
  47. logical isDelimiter(char currchar) {
  48.     return (currchar == ' ' || currchar == '\n' || currchar == '\t' || currchar == EOF);
  49. }
  50.  
  51.  
  52. //Non-destructive character lookahead. Returns capital, for easier use.
  53. char peekChar() {
  54.     char filechar = fgetc(InpFile);
  55.     ungetc(filechar, InpFile);
  56.     return toupper(filechar);
  57. }
  58.  
  59.  
  60. //Consumes a character from the InpFile's instream. Also prints lines & lex errors to LisFile
  61. char consumeChar(logical permanent) {
  62.     char filechar = fgetc(InpFile);
  63.  
  64.     if (permanent) {
  65.         //Check if a list needs to be printed to the lisfile
  66.         if (filechar == '\n' || (filechar == EOF && linePos > 0)) {
  67.             int i = 0;
  68.  
  69.             //Format the collected line. No need to clear buffer.
  70.             sprintf(lisfileBuffer, "%2d   %s\n", lineCount, lineBuff);
  71.             fputs(lisfileBuffer, LisFile);
  72.  
  73.             //Print each lex error
  74.             for (i = 0; i < Lex_Err_Index; i++)
  75.             {
  76.                 char currErr = Lex_Err_Buff[i];
  77.                 sprintf(errorBuffer, "Lexical Error on line %2d-   %c not recognized\n", lineCount, currErr);
  78.                 fputs(errorBuffer, LisFile);
  79.             }
  80.             Lex_Err_Index = 0; //Reset errors for next line
  81.  
  82.             //Clear error and line buffers
  83.             memset(Lex_Err_Buff, '\0', LINE_BUFF_SIZE);
  84.             memset(lineBuff, '\0', LINE_BUFF_SIZE);
  85.  
  86.             //Reset line index
  87.             linePos = 0;
  88.             lineCount++;
  89.         }
  90.         else {
  91.             //Append character to lineBuffer.
  92.             lineBuff[linePos] = filechar;
  93.             linePos++;
  94.         }
  95.     }
  96.  
  97.     return toupper(filechar);
  98. }
  99.  
  100.  
  101. //Consumes a digit. Also consumes the character given to the function (guaranteed to be a "-" or a digit)
  102. void gatherDigit(char currchar, logical permanent) {
  103.     logical stillDigit = ltrue;
  104.  
  105.     do {
  106.         tokenBuff[buffpos] = currchar;
  107.         buffpos++;
  108.         currchar = peekChar();
  109.  
  110.         if (isdigit(currchar) && !isDelimiter(currchar) && buffpos < TOKEN_BUFF_SIZE) {
  111.             consumeChar(permanent);
  112.         }
  113.         else
  114.             stillDigit = lfalse;
  115.  
  116.     } while (stillDigit);
  117. }
  118.  
  119. //Setts thistoken to the given token and places a tokenval into the tokenBuff. Ex: (GREATEREQUALOP, ">=")
  120. void setToken(TokenId token, char* tokenvalue) {
  121.     thistoken = token;
  122.  
  123.     //if NULL is provided, do nothing
  124.     if (tokenvalue != NULL)
  125.         strcpy_s(tokenBuff, TOKEN_BUFF_SIZE, tokenvalue);
  126. }
  127.  
  128. void clearBuffer(char* buffer, int bufferSize)
  129. {
  130.     int i = 0;
  131.     for (i; i < bufferSize; i++)
  132.     {
  133.         buffer[i] = '\0';
  134.     }
  135. }
  136.  
  137. char* decode(TokenId typeid)
  138. {
  139.     return (char*)TOKEN_TYPES[typeid];
  140. }
  141.  
  142. TokenId getReservedId() {
  143.     TokenId foundtoken = ID; //assume it's an ID, because
  144.     int i = 0;
  145.     //only go up to 13 because that's the last enterable reserved word
  146.     for (i = 0; i < 13; i++) {
  147.         if (strcmp(tokenBuff, TOKEN_TYPES[i]) == 0) {
  148.             foundtoken = i;
  149.         }
  150.     }
  151.  
  152.     return foundtoken;
  153. }
  154.  
  155.  
  156.  
  157. TokenId getToken(logical permanent)
  158. {
  159.     //if not permanent, set old pos here
  160.     if (!permanent) {
  161.         old_pos = 1;
  162.     }
  163.  
  164.     thistoken = -1; //Claim no token found
  165.     buffpos = 0;
  166.     char currchar;
  167.  
  168.     do {
  169.         currchar = consumeChar(permanent);
  170.  
  171.         if (isspace(currchar)) {
  172.             //do nothing
  173.         }
  174.         else if (isalpha(currchar)) { //Compose ID/Reserved Word
  175.             logical stillId = ltrue;
  176.  
  177.             do {
  178.                 tokenBuff[buffpos] = currchar;
  179.                 buffpos++;
  180.                 currchar = peekChar();
  181.                 if (isalnum(currchar) && !isDelimiter(currchar) && buffpos < TOKEN_BUFF_SIZE)
  182.                     consumeChar(permanent);
  183.                 else
  184.                     stillId = lfalse;
  185.  
  186.             } while (stillId);
  187.  
  188.             setToken(getReservedId(), NULL);
  189.         }
  190.         else if (isdigit(currchar)) { //Compose INTLITERAL
  191.             gatherDigit(currchar, permanent);
  192.             setToken(INTLITERAL, NULL);
  193.         }
  194.         else if (currchar == '-') { //check possible - paths (--COMMENT, -INTLITERAL, -)
  195.             char lookahead = peekChar();
  196.  
  197.             if (lookahead == '-') { //compose a comment
  198.                 do {
  199.                     currchar = consumeChar(permanent); //consume chars until newline. consume the newline, too.
  200.                 } while (currchar != '\n' && currchar != EOF && buffpos < TOKEN_BUFF_SIZE);
  201.  
  202.             }
  203.             else if (isdigit(lookahead)) {
  204.                 gatherDigit(currchar, permanent);
  205.                 setToken(INTLITERAL, NULL);
  206.             }
  207.             else {
  208.                 setToken(MINUSOP, "-");
  209.             }
  210.  
  211.         }
  212.         else if (currchar == ':') { //check possible : paths (:=, lex error)
  213.             char lookahead = peekChar();
  214.  
  215.             if (lookahead == '=') {
  216.                 consumeChar(permanent);
  217.                 setToken(ASSIGNOP, ":=");
  218.             }
  219.             else {
  220.                 setToken(LEXERR, NULL);
  221.  
  222.                 tokenBuff[buffpos] = currchar;
  223.                 Lex_Err_Buff[Lex_Err_Index] = currchar;
  224.                 Lex_Err_Index++;
  225.                 Lex_Err_Total++;
  226.             }
  227.         }
  228.         else if (currchar == '<') { //check possible : paths (<=, <>, <)
  229.             char lookahead = peekChar();
  230.  
  231.             if (lookahead == '=') {
  232.                 consumeChar(permanent);
  233.                 setToken(LESSEQUALOP, "<=");
  234.             }
  235.             else if (lookahead == '>') {
  236.                 consumeChar(permanent);
  237.                 setToken(NOTEQUALOP, "<>");
  238.             }
  239.             else {
  240.                 setToken(LESSOP, "<");
  241.             }
  242.         }
  243.         else if (currchar == '>') { //check possible : paths (>=, >)
  244.             char lookahead = peekChar();
  245.  
  246.             if (lookahead == '=') {
  247.                 consumeChar(permanent); //eat the '='
  248.                 setToken(GREATEREQUALOP, ">=");
  249.             }
  250.             else {
  251.                 setToken(GREATEROP, ">");
  252.             }
  253.         }
  254.         else if (currchar == '(') {
  255.             setToken(LPAREN, "(");
  256.         }
  257.         else if (currchar == ')') {
  258.             setToken(RPAREN, ")");
  259.         }
  260.         else if (currchar == ';') {
  261.             setToken(SEMICOLON, ";");
  262.         }
  263.         else if (currchar == ',') {
  264.             setToken(COMMA, ",");
  265.         }
  266.         else if (currchar == '+') {
  267.             setToken(PLUSOP, "+");
  268.         }
  269.         else if (currchar == '*') {
  270.             setToken(MULTOP, "*");
  271.         }
  272.         else if (currchar == '/') {
  273.             setToken(DIVOP, "/");
  274.         }
  275.         else if (currchar == '!') {
  276.             setToken(NOTOP, "!");
  277.         }
  278.         else if (currchar == '=') {
  279.             setToken(EQUALOP, "=");
  280.         }
  281.         else if (currchar == EOF) {
  282.             setToken(SCANEOF, "EOF");
  283.         }
  284.         else {
  285.             tokenBuff[buffpos] = currchar;
  286.             thistoken = LEXERR;
  287.  
  288.             Lex_Err_Buff[Lex_Err_Index] = currchar;
  289.             Lex_Err_Index++;
  290.             Lex_Err_Total++;
  291.         }
  292.     } while (thistoken == -1 && currchar != EOF);
  293.  
  294.  
  295.  
  296.     //if not permanent, set cur pos to old pos
  297.     if (!permanent) {
  298.         cur_pos = old_pos;
  299.     }
  300.  
  301.     return thistoken;
  302. }
  303.  
  304.  
  305.  
  306. //*** Primary Scanner Function ***//
  307.  
  308. void scanner(FILE* InpFile)
  309. {
  310.  
  311.     char printBuffer[OUTFILE_BUFF_SIZE] = { '\0' };
  312.  
  313.     TokenId thistoken;
  314.  
  315.     do {
  316.         thistoken = getToken(lfalse); //CURRENTLY ALWAYS PEEKING NEXT TOKEN. BEWARE.
  317.  
  318.         //parsley goes here
  319.  
  320.         char* tokentypeptr = decode(thistoken);
  321.  
  322.         clearBuffer(printBuffer, OUTFILE_BUFF_SIZE);
  323.         sprintf(printBuffer, "Token Number: %-12dToken Type: %-15sActual Token: %-15s\n", thistoken, tokentypeptr, tokenBuff);
  324.  
  325.         printf("%s", printBuffer);
  326.         //fputs(printBuffer, OutFile);
  327.  
  328.         clearBuffer(tokenBuff, TOKEN_BUFF_SIZE);
  329.     } while (thistoken != SCANEOF);
  330.  
  331.  
  332.     clearBuffer(printBuffer, OUTFILE_BUFF_SIZE);
  333.     sprintf(printBuffer, "Number of total errors: %d\n", Lex_Err_Total);
  334.     fputs(printBuffer, LisFile);
  335.  
  336. }
  337.  
  338. //utilizing the fgetpos() and fsetpos() functions. Does this go here?
  339. int fgetpos(FILE* stream, fpos_t* pos){
  340.     FILE* InpFile;
  341.     fpos_t old_pos;
  342.  
  343.     InpFile = fopen("file.txt", "w+");
  344.     fgetpos(InpFile, &old_pos);
  345.     fputs("position: %d\n", InpFile);
  346.  
  347.     fsetpos(InpFile, &old_pos);
  348.     fputs("Position overwritten", InpFile);
  349.     fclose(InpFile);
  350.  
  351.     return 0;
  352. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement