Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <string>
- #include <set>
- #include <map>
- #include <stack>
- using namespace std;
- char startP; // starting variable of the productions
- map<set<string>, int> refMap; // refMap, used to check if a set of kernels already exists
- map<int, map<char, int>> refShiftMap; // used to keep track of transitions out of each state.
- /*-----------------------------------------------------------------------------------------------------------------*/
- map<char, string> mapGrammar(vector <string> grammar) // returns a map from input strings. Variables = keys, Productions = values
- {
- map<char, string> Map; // creates a temporary map to be returned
- //startP = grammar[0][0];
- for (int i = 0; i < grammar.size(); i++) // parse through the vector of strings
- {
- string tmp = ""; // creates an initial string for the production
- for (int j = 3; j < (grammar.at(i).size()); j++) // looks at everything after "->"
- {
- tmp += grammar[i][j]; // adds production to string, character by character
- }
- //cout << "map @ " << i << ": " << grammar[i][0] << " -> " << tmp << endl;
- Map.insert({ grammar[i][0], tmp }); // inserts the variable and production to the map
- }
- return Map; // returns completed map
- }
- map<char, set<char>> mapSet(vector <string> grammar) // returs a map from input strings. Variables = keys. Sets = productions
- {
- map<char, set<char>> Map; // creates a temporary map to be returned
- for (int i = 0; i < grammar.size(); i++) // parse through the vector of strings
- {
- Map.insert({ grammar[i][0], {} }); // creates empty sets for each variable
- }
- return Map; // returns completed map
- }
- vector<string> getSubProd(string Prod) // returns a vector of all the individual sub-productions of a variable. ( D->Aa|d|# returns 0:Aa, 1:d, 2:# )
- {
- vector<string> temp; // create a temp vector to be returned
- string str; // creates a tmp string that takes in each individual sub-production
- for (int i = 0; i < Prod.size(); i++) // parse through the entire production
- {
- if (Prod.at(i) == '|') // if OR statement is reached
- {
- temp.push_back(str); // add the string before the OR statement to the vector list.
- str = ""; // resets the temp string
- }
- else
- {
- str += Prod.at(i); // add the current character to the sub-Production string
- }
- }
- if (str.size() > 0) // no OR statement found
- {
- temp.push_back(str); // add the string to the vector list
- }
- return temp; // returns the vector list of strings.
- }
- vector<string> antiSubProd(vector<string> items)
- {
- map<char, string> Map;
- for (int i = 0; i < items.size(); i++) // for each productions
- {
- char lhs = items[i][0];
- string lhsP;
- lhsP.push_back(lhs);
- lhsP.push_back('-');
- lhsP.push_back('>');
- if (Map.find(lhs) == Map.end()) // if lhs doesnt exist in map yet
- {
- Map.insert(pair<char, string>(lhs, lhsP)); // add lhs to map
- }
- else
- {
- Map.find(lhs)->second.push_back('|'); // create OR seperator
- }
- for (int j = 3; j < items[i].size(); j++) // for each letter past ->
- {
- Map.find(lhs)->second.push_back(items[i][j]);
- }
- }
- vector<string> newGrammar;
- for (auto it = Map.begin(); it != Map.end(); it++)
- {
- newGrammar.push_back(it->second);
- }
- return newGrammar;
- }
- set<char> getUnion(const set<char>& a, const set<char>& b) // union two sets
- {
- set<char> result = a;
- result.insert(b.begin(), b.end());
- return result;
- }
- void printSet(set<char> set) // print the contents of a set
- {
- cout << "{ ";
- for (auto it = set.begin(); it != set.end(); it++)
- {
- cout << *it << ", ";
- }
- cout << "}";
- }
- set<char> First(map<char, string> &Map, char S) // returns the First() set of a Variable. Takes in the map to be used as reference, and a character you want to return the First() set of.
- {
- set<char> finalSet; // set that will be returned to the function
- string production; // entire string value that is associated with each variable key. Will be broken down later by getSubProd()
- auto it = Map.find(S); // Look for the key associated with input character
- if (it != Map.end()) // if foudn
- {
- production = it->second; // assign production to the string value mapped to Variable 'S'
- }
- else // 'S' not found in map keys
- {
- finalSet.insert(S); // insert unidentified Terminal into finalSet
- return finalSet;
- }
- vector<string> initSet;
- initSet = getSubProd(production); // a vector of the sub-Productions of char 'S'
- for (int i = 0; i < initSet.size(); i++) // check each sub-Production. ( Aa | d | # )
- {
- int j = 0;
- char firstChar = initSet[i][j]; // firstChar = first character of each sub-production ( A | d | # )
- if (Map.find(firstChar) == Map.end()) // if first character is a Terminal
- {
- finalSet.insert(firstChar); // add character to finalSet
- }
- else if (firstChar != S) // first character is a Non-Terminal and not equal to the left side of the production, i.e First(S) = First(S);
- {
- finalSet = getUnion(finalSet, First(Map, firstChar)); // unions finalSet with finalSet of the Non-Terminal
- while (finalSet.find('#') != finalSet.end() && (j + 1) < initSet.at(i).size()) // while epsilon is returned from First() and it's not the last Variable in the production
- {
- finalSet.erase('#'); // erase epsilon
- char nextChar = initSet[i][++j]; // look at the next character in the production
- finalSet = getUnion(finalSet, First(Map, nextChar)); // find First() of the next character
- }
- }
- }
- return finalSet;
- }
- set<char> Follow(map<char, string> &Map, char S)
- {
- set<char> finalSet; // set that will be returned to the function
- string production;
- if (S == startP) // If the character is the starting production
- {
- finalSet.insert('$');
- }
- auto itr = Map.begin(); // itr points to the beginning of the map
- for (itr = Map.begin(); itr != Map.end(); itr++) // for each key in the map
- {
- production = itr->second; // production = production string in map
- vector<string> initSet;
- initSet = getSubProd(production); // create a vector of sub-Productions givin the production
- for (int j = 0; j < initSet.size(); j++) // for each sub-Production in the vector
- {
- for (int k = 0; k < initSet.at(j).size(); k++) // for each character in the sub-Production
- {
- if (initSet[j][k] == S) // found Variable in sub-Production
- {
- int l = k + 1; // points to next character in the sub-Production
- if (l >= initSet.at(j).size()) // if Variable is last character of sub-Production
- {
- if (initSet[j][k] != itr->first) // if Follow() of Variable is not equal to Follow() of variable on the left side of the production.
- {
- finalSet = getUnion(finalSet, Follow(Map, itr->first)); // Follow() of Variable = Follow() of Variable on left side of the production
- }
- }
- else
- {
- char nextChar = initSet[j][l]; // next character in the sub-Production
- finalSet = getUnion(finalSet, First(Map, nextChar)); // Follow() = First(nextChar)
- while (finalSet.find('#') != finalSet.end() && (l + 1) < initSet.at(j).size()) // next character is a Non-Terminal that contains '#' && Not at end of sub-Production
- {
- finalSet.erase('#');
- finalSet = getUnion(finalSet, First(Map, initSet[j][++l])); // Moves onto the next character in the sub-Production
- }
- if (finalSet.find('#') != finalSet.end()) // if the last character returned an epsilon
- {
- finalSet.erase('#');
- if (initSet[j][l] != itr->first) // if Follow() of Variable is not equal to Follow() of variable on the left side of the production.
- {
- finalSet = getUnion(finalSet, Follow(Map, itr->first)); // Follow() of Variable = Follow() of Variable on left side of the production
- }
- }
- }
- }
- }
- }
- }
- return finalSet;
- }
- map<char, set<char>> getFollowMap(vector<string> input)
- {
- vector<string> initFollow = antiSubProd(input);
- map<char, string> gramMap = mapGrammar(initFollow);
- map<char, set<char>> followMap;
- followMap = mapSet(initFollow); // creates a map for the follow set
- for (int i = 0; i < initFollow.size(); i++) // assigns Follow() to each Variable in the grammar
- {
- set<char> temp = Follow(gramMap, initFollow[i][0]);
- followMap.find(initFollow[i][0])->second = temp;
- cout << "Follow(" << initFollow[i][0] << ") = ";
- printSet(temp);
- cout << endl;
- }
- return followMap;
- }
- /*---------------------------------------------------------------------------------------------------------------------------------------*/
- void printLR(vector<vector<string>> v) // goes through the entire vector of vector of strings
- {
- for (int i = 0; i < v.size(); i++) // for each table
- {
- cout << "Table: " << i << endl;
- for (int j = 0; j < v[i].size(); j++) // for each production
- {
- cout << v[i][j] << endl;
- }
- cout << endl;
- }
- }
- void printRefMap(map<set<string>, int> refMap) // prints the reference map.
- {
- for (auto it = refMap.begin(); it != refMap.end(); it++) // for each set in the map
- {
- cout << "kernel: " << it->second << endl; // it->second = table the set appears in
- for (auto setItr = it->first.begin(); setItr != it->first.end(); setItr++) // for each item in the set
- {
- cout << *setItr << endl;
- }
- cout << endl;
- }
- }
- set<char> getGrammarSymbols(vector<string> grammar) // creates a set of every symbol in the grammar. Used for GOTO(I,X)
- {
- set<char> gramSymbs;
- for (int i = 0; i < grammar.size(); i++) // for each production
- {
- for (int j = 0; j < grammar[i].size(); j++) // for each char of the production
- {
- char currentChar = grammar[i][j];
- if (currentChar != '-' && currentChar != '>' && gramSymbs.find(currentChar) == gramSymbs.end()) // if the symbol hasnt been seen, and it's not a "->"
- {
- gramSymbs.insert(currentChar);
- }
- }
- }
- return gramSymbs;
- }
- set<char> getVars(vector<string> grammar)
- {
- set<char> variables;
- for (int i = 0; i < grammar.size(); i++) // creates a set of all the variables
- {
- variables.insert(grammar[i][0]); // first character of every production is a variable
- }
- return variables;
- }
- set<char> getTerms(vector<string> grammar, set<char> vars)
- {
- set<char> terms;
- for (int i = 0; i < grammar.size(); i++) // for each production
- {
- for (int j = 0; j < grammar[i].size(); j++) // for each char of the production
- {
- char currentChar = grammar[i][j];
- if (currentChar != '-' && currentChar != '>' &&
- vars.find(currentChar) == vars.end()) // if the terminal hasnt been seen, and it's not a "->"
- {
- terms.insert(currentChar);
- }
- }
- }
- return terms;
- }
- vector<char> getAllSymbols(set<char> terms, set<char> vars)
- {
- vector<char> symbols;
- for (auto itr = terms.begin(); itr != terms.end(); itr++)
- {
- symbols.push_back(*itr);
- }
- symbols.push_back('$');
- for (auto itr = vars.begin(); itr != vars.end(); itr++)
- {
- symbols.push_back(*itr);
- }
- return symbols;
- }
- vector<string> initialDot(vector<string> v) // creates a string of productions that start with "."
- {
- vector<string> finalString;
- string temp;
- temp = v[0][0];
- temp += "'->.";
- temp += v[0][0];
- finalString.push_back(temp); // augmented production is added to the grammar (E'->.E)
- for (int i = 0; i < v.size(); i++) // adds "." to the beginning of every production
- {
- temp = v[i];
- if (temp.size() == 4 && temp[3] == '#')
- {
- temp.pop_back();
- }
- temp.insert(3, ".");
- finalString.push_back(temp);
- }
- return finalString;
- }
- char charAfterDot(string production) // returns the character after a '.'
- {
- int dotLoc = production.find('.');
- char nextChar;
- if (dotLoc != production.size() - 1) // if the dot isnt at the end of the production
- {
- nextChar = production[dotLoc + 1]; // char after the dot
- }
- else // if dot is at the end
- {
- nextChar = NULL;
- }
- return nextChar;
- }
- string moveDot(string prod) // move the dot forward in the production
- {
- int loc = prod.find(".", 3); // locate the dot
- if (loc != prod.size() - 1) // dot isn't at the end of the production
- {
- swap(prod.at(loc), prod.at(loc + 1)); // swaps the dot with the character after it
- }
- return prod; // return a new production with the dot moved forward one spot
- }
- vector<string> GOTO(vector<string> itemSet, char c)
- {
- vector<string> kernels; // vector we'll be returning
- for (int i = 0; i < itemSet.size(); i++) // for each item in the set
- {
- char nextChar = charAfterDot(itemSet[i]); // get the character after the '.'
- if (nextChar == c) // if nextChar = to character we're building a set for
- {
- string nextProd = moveDot(itemSet[i]); // move the dot forward in the item
- kernels.push_back(nextProd); // add the new item to the kernel vector
- }
- }
- return kernels;
- }
- set<string> setOf(vector<string> kernels) // creates a set given a vector<string>
- {
- set<string> kernelSet;
- for (int i = 0; i < kernels.size(); i++) // for each item in the vector
- {
- kernelSet.insert(kernels[i]);
- }
- return kernelSet;
- }
- vector<string> Closure(vector<string> dotVector, vector<string> kernels) // creats the closure of a given set of productions. Uses the origin dotVector table as reference.
- {
- vector<string> closure; // a vector of the productions in closure
- closure = kernels; // sets closure equal to the current set of productions
- set<char> variables;
- for (int i = 0; i < dotVector.size(); i++) // creates a set of all the variables
- {
- variables.insert(dotVector[i][0]); // first character of every production is a variable
- }
- set<char> seenVars; // a set of all the variables that have already been seen
- for (int i = 0; i < closure.size(); i++) // for each production in the closure vector
- {
- int dotLoc = closure[i].find(".", 3); // find the dot in the production
- char nextChar = closure[i][dotLoc + 1]; // character after the dot
- if (variables.find(nextChar) != variables.end() && seenVars.find(nextChar) == seenVars.end()) // if next character is a variable and hasn't already been seen.
- {
- for (int j = 1; j < dotVector.size(); j++) // for each production in the dotVector. Dont check the augmented production
- {
- if (dotVector[j][0] == nextChar) // if production starting with nextChar is found
- {
- closure.push_back(dotVector[j]); // add that production to the closure vector
- }
- }
- seenVars.insert(nextChar); // adds variable to the list of seen Variables (prevents infinite loops)
- }
- }
- return closure;
- }
- void outputShifts(map<char, int> Map)
- {
- for (auto itr = Map.begin(); itr != Map.end(); itr++)
- {
- cout << itr->first << " -> " << itr->second << endl;
- }
- }
- void outputRefShiftMap(map<int, map<char, int>> shiftMap)
- {
- for (auto itr = shiftMap.begin(); itr != shiftMap.end(); itr++)
- {
- cout << "table: " << itr->first << endl;
- outputShifts(itr->second);
- cout << endl;
- }
- }
- vector<vector<string>> LRset(vector<string> v) // creates the LR(0) sets of a grammar
- {
- vector<vector<string>> LR0; // The LR(0) set.
- set<char> gramSymbs = getGrammarSymbols(v); // set of every symbol that appears in the grammar. Used for GOTO(I,X)
- vector<string> dotProductions = initialDot(v); // Augmented grammar with '.' in front of each production
- vector<string> set0 = { dotProductions[0] }; // Put augmented production (E'->E) in first set
- set0 = Closure(dotProductions, set0); // Do closure of the first set
- LR0.push_back(set0); // pushes first set onto LR(0) sets
- refMap.insert(pair<set<string>, int>({ set0[0] }, 0));
- map<char, int> shifts;
- for (int i = 0; i < LR0.size(); i++) // for each set
- {
- shifts.clear();
- for (int j = 0; j < LR0[i].size(); j++) // for each production in the set
- {
- for (auto itr = gramSymbs.begin(); itr != gramSymbs.end(); itr++) // for each grammar symbol
- {
- vector<string> kernels = GOTO(LR0[i], *itr); // create vector of Kernels from a grammar symbol
- set<string> kernelSet = setOf(kernels); // assign kernels to set, used for checking with refMap
- if (kernels.size() != 0 && refMap.find(kernelSet) == refMap.end()) // if kernelSet exist and isn't in refMap
- {
- vector<string> closure = Closure(dotProductions, kernels); // do closure of kernel vector
- LR0.push_back(closure); // add Closure(kernel) to LR(0) sets
- refMap.insert(pair<set<string>, int>(kernelSet, LR0.size() - 1)); // insert kernelSet in refMap for future referencing
- }
- if (kernels.size() != 0)
- {
- shifts.insert(pair<char, int>(*itr, refMap.find(kernelSet)->second));
- }
- }
- }
- refShiftMap.insert(pair<int, map<char, int>>(i, shifts));
- }
- cout << endl;
- printLR(LR0);
- cout << endl << "kernel reference Map" << endl;
- printRefMap(refMap);
- cout << endl << "shift reference Map" << endl;
- outputRefShiftMap(refShiftMap);
- return LR0;
- }
- void printGrammar(vector<string> grammar)
- {
- cout << "Grammar" << endl;
- for (int i = 0; i < grammar.size(); i++)
- {
- cout << grammar[i] << endl;
- }
- cout << endl;
- }
- string finalItem(set<string> kernels)
- {
- for (auto itr = kernels.begin(); itr != kernels.end(); itr++)
- {
- string prod = *itr;
- int dotLoc = prod.find(".",0);
- if (dotLoc == prod.size() - 1)
- {
- return prod;
- }
- }
- return "";
- }
- set<string> refMapFind(int i)
- {
- for (auto itr = refMap.begin(); itr != refMap.end(); itr++)
- {
- if (itr->second == i)
- {
- return itr->first;
- }
- }
- }
- /*-------------------------------------------------------------------------------------------------------------*/
- int main()
- {
- vector<string> input;
- /*
- input.push_back("E->TD");
- input.push_back("D->+TD");
- input.push_back("D->#");
- input.push_back("T->FU");
- input.push_back("U->*FU");
- input.push_back("U->#");
- input.push_back("F->(E)");
- input.push_back("F->I");
- input.push_back("I->x");
- input.push_back("I->y");
- input.push_back("I->z");
- /*/
- /*
- input.push_back("S->SS");
- input.push_back("S->00");
- input.push_back("S->11");
- /*/
- //*
- input.push_back("S->0S0");
- input.push_back("S->1S1");
- input.push_back("S->1");
- input.push_back("S->0");
- /*/
- /*
- input.push_back("S->A");
- input.push_back("S->B");
- input.push_back("S->C0A");
- input.push_back("A->C0C");
- input.push_back("C->0C1");
- input.push_back("C->CC");
- input.push_back("C->#");
- input.push_back("B->D1B");
- input.push_back("B->D1D");
- input.push_back("D->0D1");
- input.push_back("D->1D0");
- input.push_back("D->DD");
- input.push_back("D->#");
- /*/
- /*
- input.push_back("E->T");
- input.push_back("E->T+E");
- input.push_back("T->(E)");
- input.push_back("T->i*T");
- input.push_back("T->i");
- /*/
- /*
- input.push_back("S->AA");
- input.push_back("A->aA");
- input.push_back("A->b");
- /*/
- /*
- input.push_back("E->E+T");
- input.push_back("E->T");
- input.push_back("T->T*F");
- input.push_back("T->F");
- input.push_back("F->(E)");
- input.push_back("F->i");
- /*/
- /*
- input.push_back("S->ABCD");
- input.push_back("A->a");
- input.push_back("A->#");
- input.push_back("B->CD");
- input.push_back("B->b");
- input.push_back("C->c");
- input.push_back("C->#");
- input.push_back("D->Aa");
- input.push_back("D->d");
- input.push_back("D->#");
- /*/
- /*
- input.push_back("S->E");
- input.push_back("E->E+T");
- input.push_back("E->T");
- input.push_back("T->T*F");
- input.push_back("T->F");
- input.push_back("F->i");
- input.push_back("F->(E)");
- */
- printGrammar(input);
- startP = input[0][0];
- set<char> variables = getVars(input);
- set<char> terminals = getTerms(input, variables);
- vector<char> symbols = getAllSymbols(terminals, variables);
- map<char, set<char>> followMap = getFollowMap(input);
- vector<vector<string>> LR = LRset(input);
- //map<set<string>, int> refMap; // refMap, used to check if a set of kernels already exists
- //map<int, map<char, int>> refShiftMap; // used to keep track of transitions out of each state.
- const int setSize = LR.size();
- stack<int> states;
- vector<vector<string>> AGTable;
- AGTable.resize(setSize);
- cout << "ACTION & GOTO Table" << endl;
- cout << "s: ";
- for (int i = 0; i < symbols.size(); i++)
- {
- cout << symbols[i] << " ";
- }
- cout << endl;
- for (int i = 0; i < LR.size(); i++)
- {
- AGTable[i].resize(symbols.size());
- auto state = refShiftMap.find(i);
- if (i < 10)
- {
- cout << i << ": ";
- }
- else
- cout << i << ":";
- set<string> kernels = refMapFind(i);
- string reduce = finalItem(kernels);
- for (int j = 0; j < symbols.size(); j++)
- {
- char currSym = symbols[j];
- auto transTable = state->second;
- auto transition = transTable.find(currSym);
- if (transition != transTable.end())
- {
- if (j < terminals.size()+1)
- {
- AGTable[i][j] = "s" + to_string(transition->second);
- }
- else
- {
- AGTable[i][j] = "g" + to_string(transition->second);
- }
- }
- else if (reduce.size() > 0)
- {
- char lhs = reduce[0];
- set<char> followSet = followMap.find(lhs)->second;
- if (followSet.find(currSym) != followSet.end())
- {
- if (currSym == '$' && reduce[1] == '\'')
- {
- AGTable[i][j] = "A!";
- }
- else
- {
- AGTable[i][j] = "r*";
- }
- }
- else
- {
- AGTable[i][j] = "--";
- }
- }
- else
- {
- AGTable[i][j] = "--";
- }
- cout << AGTable[i][j] << " ";
- }
- cout << endl;
- }
- system("pause");
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement