Untitled

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <string>
#include <vector>
#include <cstring>
#include <cctype>
#include <fstream>

// definitions

enum {
	COMMA,
	DIV,
	EQUAL,
	FOR,
	FLOAT,
	ID,
	INTEGER,
	MINUS,
	MULTIPLE,
	PLUS,
	SIMICOLON,
	UNDEFINED,
	WHILE,
	KEYWORD,
	BRACKETS,
	GEQ,
	SEQ,
	GREATER,
	SMALLER,
	ASSIGN};

std::string tokens[] = {
	"COMMA",
	"DIV",
	"EQUAL",
	"FOR",
	"FLOAT",
	"ID",
	"INTEGER",
	"MINUS",
	"MULTIPLE",
	"PLUS",
	"SIMICOLON",
	"UNDEFINED",
	"WHILE",
	"KEYWORD",
	"BRACKETS",
	"GEQ",
	"SEQ",
	"GREATER",
	"SMALLER",
	"ASSIGN"};

std::vector<std::pair<int, std::string>> dict;

void jump_comment(int& start, std::string& code_text, bool method) {
	auto ch = code_text[start];
	auto temp = start;
	if (method) {
		start += 2;
		while (true) {
			while (ch != '*')
				ch = code_text[++start];
			if (code_text[start + 1] == '/') {
				start += 2;
				break;
			}
		}
	}
	else {
		while (ch != '\n') {
			ch = code_text[++start];
		}
	}
}

void jump_head(int& start, std::string& code_text) {
	auto ch = code_text[start];
	while (ch != '\n')
		ch = code_text[++start];
}

void get_digit(int& start, std::string& code_text) {
	auto ch = code_text[start];
	int temp = start;
	while (isdigit(ch))
		ch = code_text[++temp];

	if (ch == '.') {
		do
			ch = code_text[++temp];
		while (isdigit(ch));
		dict.emplace_back(std::make_pair(
			FLOAT, code_text.substr(start, temp - start)));
	}
	else {
		dict.emplace_back(std::make_pair(
			INTEGER, code_text.substr(start, temp - start)));
	}
	start = temp;
}

std::string keywords[] = {
	"int",
	"double",
	"void",
	"violiate",
	"const",
	"float",
	"return",
	"if",
	"else"};

bool iskeyword(std::string lts) {
	for (int i = 0; i < 7; i++) {
		if (keywords[i] == lts)
			return true;
	}
	return false;
}

void get_entity(int& start, std::string& code_text) {
	int temp = start;
	char ch = code_text[start];
	while (ch != ' ' && ch != '(' && ch != ')' && ch != '=' && ch != '"')
		ch = code_text[++temp];
	std::string buf = code_text.substr(start, temp - start);
	if (iskeyword(buf))
		dict.emplace_back(std::make_pair(KEYWORD, buf));
	else
		dict.emplace_back(std::make_pair(ID, buf));

	start = temp;
}

std::string load_file(char* filename) {
	std::ifstream ifs(filename, std::ios::in | std::ios::binary | std::ios::ate);

	std::ifstream::pos_type fileSize = ifs.tellg();
	ifs.seekg(0, std::ios::beg);

	std::vector<char> bytes(fileSize);
	ifs.read(bytes.data(), fileSize);

	return std::string(bytes.data(), fileSize);
}

void lex(std::string context) {
	for (int index = 0; index < context.length(); index++) {
		char ch = context[index];

		if (ch <= ' ')
			continue;

		if (ch <= '9' && ch >= '0') {
			get_digit(index, context);
			ch = context[index];
		}
		if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
			get_entity(index, context);
			ch = context[index];
		}

		switch (ch) {
		case '+':
			dict.emplace_back(std::make_pair(PLUS, "+"));
			break;
		case ';':
			dict.emplace_back(std::make_pair(SIMICOLON, ";"));
			break;
		case ',':
			dict.emplace_back(std::make_pair(COMMA, ","));
			break;
		case '=':
			if (index + 1 < context.length() && context[index + 1] == '=') {
				dict.emplace_back(std::make_pair(EQUAL, "=="));
				index += 1;
			}
			else
				dict.emplace_back(std::make_pair(ASSIGN, "="));
			break;
		case '/':
			if (index + 1 < context.length() && context[index + 1] == '/')
				jump_comment(index, context, 0);
			else if (index + 1 < context.length() && context[index + 1] == '*')
				jump_comment(index, context, 1);
			else
				dict.emplace_back(std::make_pair(DIV, "/"));
			break;
		case '*':
			dict.emplace_back(std::make_pair(MULTIPLE, "*"));
			break;
		case '(':
			dict.emplace_back(std::make_pair(BRACKETS, "("));
			break;
		case ')':
			dict.emplace_back(std::make_pair(BRACKETS, ")"));
			break;
		case '#':
			jump_head(index, context);
			break;
		case '>':
			if (index + 1 < context.length() && context[index + 1] == '=') {
				dict.emplace_back(std::make_pair(GEQ, ">="));
				index++;
			}
			else
				dict.emplace_back(std::make_pair(GREATER, ">"));
			break;
		case '<':
			if (index + 1 < context.length() && context[index + 1] == '=') {
				dict.emplace_back(std::make_pair(SEQ, "<="));
				index++;
			}
			else
				dict.emplace_back(std::make_pair(SMALLER, "<"));
			break;
		default:
			break;
		}
	}
}

int main() {
	//lex("C:\\Users\\10716\\Desktop\\lex\\a.txt");
	std::string text = load_file("file path here");
	std::string digit = "123.456 123";
	int start = 0;
	//get_digit(start, digit);
	lex(text);
	for (int i = 0; i < dict.size(); i++) {
		std::cout << tokens[dict[i].first] << " <---> " << dict[i].second << std::endl;
	}
	return 0;
}

/* input sample
#include <iostream>
#include <cstdio>

int main() {
	// aaaaa
	/* qwertyui*/
	int a = 0;
	float b = 123.456;
	if (a == b) a = 1;
	return 0;
}
*/

/* output sample
KEYWORD <---> int
ID <---> main
BRACKETS <---> (
BRACKETS <---> )
KEYWORD <---> int
ID <---> a
ASSIGN <---> =
INTEGER <---> 0
SIMICOLON <---> ;
KEYWORD <---> float
ID <---> b
ASSIGN <---> =
FLOAT <---> 123.456
SIMICOLON <---> ;
ID <---> if
BRACKETS <---> (
ID <---> a
EQUAL <---> ==
ID <---> b
BRACKETS <---> )
ID <---> a
ASSIGN <---> =
INTEGER <---> 1
SIMICOLON <---> ;
KEYWORD <---> return
INTEGER <---> 0
SIMICOLON <---> ;
*/