Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include<bits/stdc++.h>
- using namespace std;
- class SymbolInfo{
- string symbol_name, symbol_type;
- public:
- SymbolInfo(string s_n, string s_t)
- {
- symbol_name = s_n;
- symbol_type = s_t;
- }
- void printSymbol() /// for printing a SymbolInfo
- {
- cout << "<" << symbol_name << ", " << symbol_type << "> ";
- }
- bool isSame(string s_n) /// For checking if a string matches with this->symbol_name
- {
- return s_n == symbol_name;
- }
- };
- class SymbolTable{
- ///Table is vector pointer initialized in constructor
- vector<SymbolInfo> *Table;
- int mod, hash_type; /// member variables
- /// these functions are private because we don't need to call it from outside of this class.
- /// it returns the index of the SymbolInfo stored in the Table based on the mod and hash type.
- int get_index(string s_n)
- {
- int len = s_n.size();
- int index;
- if(hash_type == 1) /// hash_type = 1 denotes technique-1
- {
- if(len %2 == 0) /// for even
- {
- if(len > 1) index = ((int)s_n[len - 1]) + ((int)s_n[len - 2]) * 28; ///if size is at least 2
- else index = ((int)s_n[len - 1]) * 28; /// if size is 1
- index %= mod;
- }
- else
- {
- if(len > 1) index = ((int)s_n[0]) + ((int)s_n[1]) * 28;
- else index = ((int)s_n[0]) * 28;
- index %= mod;
- }
- }
- else if(hash_type == 2) /// hash_type = 2 denotes technique-2
- {
- index = 0;
- for (int i = 0; i < len; i+=2) /// Iterate through all the even position characters
- {
- int x = ((int)s_n[i]) << 8; /// left shift 8 times
- index += x % mod; /// mod the value of x and add it
- index %= mod; /// mod again, because i used ((a+b) mod) = ((a mod + b mod) mod)
- }
- }
- return index;
- }
- /// it returns the position of the index where the symbol name exists or returns -1 if it doesn't exist
- int search(string s_n)
- {
- int len = s_n.size();
- int index = get_index(s_n); /// find the index of given string name
- for (int i = 0; i < Table[index].size(); i++) /// iterate though all elements of that index
- if (Table[index][i].isSame(s_n)) /// if any element matches with given string, return it's position
- return i;
- return -1; /// if no matching is found, return -1
- }
- public:
- SymbolTable(int mod, int h_type = 1) /// by default hash_type is set 1
- {
- this->mod = mod;
- if(h_type == 2) this->hash_type = h_type; /// hash_type = 2 for technique-2
- else hash_type = 1; /// otherwise hash_type = 1 for technique-1
- Table = new vector<SymbolInfo>[mod]; /// allocate the memory of the table dynamically
- }
- void Insert(string s_n, string s_t)
- {
- int len = s_n.size();
- int index = get_index(s_n);
- int pos = search(s_n); /// search if it is already exists or not
- if(pos == -1) /// if not exists, search() function will return -1
- {
- pos = Table[index].size(); /// pos will be right after the last element which is also the size of the vector of that index.
- Table[index].push_back(SymbolInfo(s_n, s_t)); /// push the symbolInfo
- Table[index][pos].printSymbol();
- cout << "Inserted at position (" << index << ", " << pos << ")\n\n";
- }
- else /// if pos != -1, that means it already exists!
- {
- Table[index][pos].printSymbol();
- cout << "already exists\n\n";
- }
- }
- void LookUp(string s_n) /// this function print the index and position of SymbolInfo
- {
- int len = s_n.size();
- int index = get_index(s_n), pos = search(s_n);
- if(pos != -1) cout << "Found at (" << index << ", " << pos << ")\n\n";
- else cout << s_n << " not found!\n\n";
- }
- void Delete(string s_n)
- {
- int len = s_n.size();
- int index = get_index(s_n), pos = search(s_n);
- if(pos == -1)
- {
- cout << s_n << " not found!\n\n";
- return;
- }
- for (int i = pos + 1; i < Table[index].size(); i++)
- Table[index][i - 1] = Table[index][i]; ///left shift every element starting from pos+1
- Table[index].pop_back(); /// pop_back() removes the last element
- cout << "Deleted from (" << index << ", " << pos << ")\n\n";
- }
- void Print()
- {
- for (int i = 0; i < mod; i++)
- {
- cout << i << " -> ";
- for (int j = 0; j < Table[i].size(); j++)
- Table[i][j].printSymbol();
- cout << "\n";
- }
- }
- ~SymbolTable() /// free the allocated memory here
- {
- delete[] Table;
- }
- };
- bool is_keyword(string s)
- {
- vector<string> keywords = {"if", "else", "for", "while", "break", "int", "char", "float",
- "double", "void", "return"};
- for(string i : keywords)
- if(i == s)
- return true;
- return false;
- }
- bool is_number(string s)
- {
- for(int i = 0; i<s.size(); i++)
- {
- if(s[i] < '0' || s[i] > '9') return false;
- }
- return true;
- }
- bool is_valid(char ch)
- {
- if((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) return true;
- if(ch == '_') return true;
- if(ch >= '0' && ch <= '9') return true;
- return false;
- }
- bool is_valid(string s)
- {
- if((s[0] >= 'a' && s[0] <= 'z') || (s[0] >= 'A' && s[0] <= 'Z'))
- {
- for(char i : s) if(is_valid(i)) return true;
- }
- return false;
- }
- bool check_ignore(string s)
- {
- if(is_number(s)) return true;
- vector<string> ignore_words = {" ", ";", "(", ")", "{", "}", "[", "]", ",", "&",".",":","\n"};
- for(string i : ignore_words)
- if(i == s)
- return true;
- return false;
- }
- bool end_char(char ch)
- {
- string s = " ;(){}[]+-*/%&,<>=.:\"\n";
- for (char i : s)
- if (i == ch)
- return true;
- return false;
- }
- bool is_operator(string s)
- {
- vector<string> operators = {"+", "-", "*", "/", "%", "<", ">", "=", "!"};
- for(string i : operators)
- if(i == s)
- return true;
- return false;
- }
- bool is_operator(string s1, string s2)
- {
- if(!is_operator(s2)) return false;
- string s = s1 + s2;
- vector<string> operators = {"++", "--", "==", "!=", ">=", "<="};
- for(string i : operators)
- if(i == s)
- return true;
- return false;
- }
- signed main() {
- int mod = 51, hash_type = 1;
- SymbolTable Table(mod, hash_type);
- string s;
- int line_number = 0;
- ifstream inFile ("input.txt");
- ofstream outKey ("out_keyword.txt");
- ofstream outId ("out_identifier.txt");
- ofstream outFn ("out_function.txt");
- ofstream outOpt ("out_operator.txt");
- while (getline(inFile, s))
- {
- line_number++;
- if(s[0] == '#') continue;
- vector<string> all_words; ///to store every non-empty valid substring for lexeme
- int buffer_start = 0, forward = 0;
- while(buffer_start < s.size())
- {
- if(s[buffer_start] == ' ')
- {
- buffer_start++;
- continue;
- }
- string temp = "";
- forward = buffer_start;
- while (forward < s.size())
- {
- temp += s[forward];
- forward++;
- if(end_char(s[forward]) || end_char(s[buffer_start])) break;
- }
- // cout << temp << endl;
- all_words.push_back(temp);
- buffer_start = forward;
- }
- /// Iterate each word
- for (int i = 0; i < all_words.size(); i++)
- {
- if(check_ignore(all_words[i])) continue;
- /// Ignore Literals
- if(all_words[i] == "\"")
- {
- i++;
- while (i < all_words.size())
- {
- if (all_words[i] == "\"")
- break;
- i++;
- }
- continue;
- }
- string symbol_name, symbol_type;
- /// check keyword
- if(is_keyword(all_words[i]))
- {
- if(all_words[i] == "else" && i+1 < all_words[i].size())
- {
- if(i+1 < all_words.size() && all_words[i+1] == "if")
- {
- symbol_name = "else if";
- i++;
- }
- else symbol_name = all_words[i];
- }
- else symbol_name = all_words[i];
- symbol_type = "KEYWORD";
- outKey << symbol_name << " " << line_number << "\n";
- }
- /// check operator
- else if(is_operator(all_words[i]))
- {
- if (i + 1 < all_words.size())
- {
- if (is_operator(all_words[i], all_words[i+1]))
- {
- symbol_name = all_words[i] + all_words[i + 1];
- i++;
- }
- else symbol_name = all_words[i];
- }
- else symbol_name = all_words[i];
- symbol_type = "OPERATOR";
- outOpt << symbol_name << " " << line_number << "\n";
- }
- /// check function/identifier
- else if(is_valid(all_words[i]))
- {
- symbol_name = all_words[i];
- ///if next word is "(" then current word is a function, otherwise identifier
- if(i+1 < all_words.size() && all_words[i+1] == "(")
- {
- symbol_type = "FUNCTION";
- i++;
- outFn << symbol_name << " " << line_number << "\n";
- }
- else
- {
- symbol_type = "IDENTIFIER";
- outId << symbol_name << " " << line_number << "\n";
- }
- }
- /// otherwise print error
- else
- {
- cout << "Lexical Error at Line: " << line_number << "\n";
- continue;
- }
- Table.Insert(symbol_name, symbol_type);
- }
- }
- inFile.close();
- outId.close();
- outKey.close();
- outFn.close();
- cout << "\n\nSymbol Table:\n\n";
- Table.Print();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement