Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <cstdio>
- #include <algorithm>
- #include <string>
- #include <vector>
- #include <cstring>
- #include <cctype>
- #include <fstream>
- // definitions
- enum {
- COMMA,
- DIV,
- EQUAL,
- FOR,
- FLOAT,
- ID,
- INTEGER,
- MINUS,
- MULTIPLE,
- PLUS,
- SIMICOLON,
- UNDEFINED,
- WHILE,
- KEYWORD,
- BRACKETS,
- GEQ,
- SEQ,
- GREATER,
- SMALLER,
- ASSIGN};
- std::string tokens[] = {
- "COMMA",
- "DIV",
- "EQUAL",
- "FOR",
- "FLOAT",
- "ID",
- "INTEGER",
- "MINUS",
- "MULTIPLE",
- "PLUS",
- "SIMICOLON",
- "UNDEFINED",
- "WHILE",
- "KEYWORD",
- "BRACKETS",
- "GEQ",
- "SEQ",
- "GREATER",
- "SMALLER",
- "ASSIGN"};
- std::vector<std::pair<int, std::string>> dict;
- void jump_comment(int& start, std::string& code_text, bool method) {
- auto ch = code_text[start];
- auto temp = start;
- if (method) {
- start += 2;
- while (true) {
- while (ch != '*')
- ch = code_text[++start];
- if (code_text[start + 1] == '/') {
- start += 2;
- break;
- }
- }
- }
- else {
- while (ch != '\n') {
- ch = code_text[++start];
- }
- }
- }
- void jump_head(int& start, std::string& code_text) {
- auto ch = code_text[start];
- while (ch != '\n')
- ch = code_text[++start];
- }
- void get_digit(int& start, std::string& code_text) {
- auto ch = code_text[start];
- int temp = start;
- while (isdigit(ch))
- ch = code_text[++temp];
- if (ch == '.') {
- do
- ch = code_text[++temp];
- while (isdigit(ch));
- dict.emplace_back(std::make_pair(
- FLOAT, code_text.substr(start, temp - start)));
- }
- else {
- dict.emplace_back(std::make_pair(
- INTEGER, code_text.substr(start, temp - start)));
- }
- start = temp;
- }
- std::string keywords[] = {
- "int",
- "double",
- "void",
- "violiate",
- "const",
- "float",
- "return",
- "if",
- "else"};
- bool iskeyword(std::string lts) {
- for (int i = 0; i < 7; i++) {
- if (keywords[i] == lts)
- return true;
- }
- return false;
- }
- void get_entity(int& start, std::string& code_text) {
- int temp = start;
- char ch = code_text[start];
- while (ch != ' ' && ch != '(' && ch != ')' && ch != '=' && ch != '"')
- ch = code_text[++temp];
- std::string buf = code_text.substr(start, temp - start);
- if (iskeyword(buf))
- dict.emplace_back(std::make_pair(KEYWORD, buf));
- else
- dict.emplace_back(std::make_pair(ID, buf));
- start = temp;
- }
- std::string load_file(char* filename) {
- std::ifstream ifs(filename, std::ios::in | std::ios::binary | std::ios::ate);
- std::ifstream::pos_type fileSize = ifs.tellg();
- ifs.seekg(0, std::ios::beg);
- std::vector<char> bytes(fileSize);
- ifs.read(bytes.data(), fileSize);
- return std::string(bytes.data(), fileSize);
- }
- void lex(std::string context) {
- for (int index = 0; index < context.length(); index++) {
- char ch = context[index];
- if (ch <= ' ')
- continue;
- if (ch <= '9' && ch >= '0') {
- get_digit(index, context);
- ch = context[index];
- }
- if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
- get_entity(index, context);
- ch = context[index];
- }
- switch (ch) {
- case '+':
- dict.emplace_back(std::make_pair(PLUS, "+"));
- break;
- case ';':
- dict.emplace_back(std::make_pair(SIMICOLON, ";"));
- break;
- case ',':
- dict.emplace_back(std::make_pair(COMMA, ","));
- break;
- case '=':
- if (index + 1 < context.length() && context[index + 1] == '=') {
- dict.emplace_back(std::make_pair(EQUAL, "=="));
- index += 1;
- }
- else
- dict.emplace_back(std::make_pair(ASSIGN, "="));
- break;
- case '/':
- if (index + 1 < context.length() && context[index + 1] == '/')
- jump_comment(index, context, 0);
- else if (index + 1 < context.length() && context[index + 1] == '*')
- jump_comment(index, context, 1);
- else
- dict.emplace_back(std::make_pair(DIV, "/"));
- break;
- case '*':
- dict.emplace_back(std::make_pair(MULTIPLE, "*"));
- break;
- case '(':
- dict.emplace_back(std::make_pair(BRACKETS, "("));
- break;
- case ')':
- dict.emplace_back(std::make_pair(BRACKETS, ")"));
- break;
- case '#':
- jump_head(index, context);
- break;
- case '>':
- if (index + 1 < context.length() && context[index + 1] == '=') {
- dict.emplace_back(std::make_pair(GEQ, ">="));
- index++;
- }
- else
- dict.emplace_back(std::make_pair(GREATER, ">"));
- break;
- case '<':
- if (index + 1 < context.length() && context[index + 1] == '=') {
- dict.emplace_back(std::make_pair(SEQ, "<="));
- index++;
- }
- else
- dict.emplace_back(std::make_pair(SMALLER, "<"));
- break;
- default:
- break;
- }
- }
- }
- int main() {
- //lex("C:\\Users\\10716\\Desktop\\lex\\a.txt");
- std::string text = load_file("file path here");
- std::string digit = "123.456 123";
- int start = 0;
- //get_digit(start, digit);
- lex(text);
- for (int i = 0; i < dict.size(); i++) {
- std::cout << tokens[dict[i].first] << " <---> " << dict[i].second << std::endl;
- }
- return 0;
- }
- /* input sample
- #include <iostream>
- #include <cstdio>
- int main() {
- // aaaaa
- /* qwertyui*/
- int a = 0;
- float b = 123.456;
- if (a == b) a = 1;
- return 0;
- }
- */
- /* output sample
- KEYWORD <---> int
- ID <---> main
- BRACKETS <---> (
- BRACKETS <---> )
- KEYWORD <---> int
- ID <---> a
- ASSIGN <---> =
- INTEGER <---> 0
- SIMICOLON <---> ;
- KEYWORD <---> float
- ID <---> b
- ASSIGN <---> =
- FLOAT <---> 123.456
- SIMICOLON <---> ;
- ID <---> if
- BRACKETS <---> (
- ID <---> a
- EQUAL <---> ==
- ID <---> b
- BRACKETS <---> )
- ID <---> a
- ASSIGN <---> =
- INTEGER <---> 1
- SIMICOLON <---> ;
- KEYWORD <---> return
- INTEGER <---> 0
- SIMICOLON <---> ;
- */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement