Advertisement
Guest User

Untitled

a guest
Oct 18th, 2019
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.20 KB | None | 0 0
  1. #include <iostream>
  2. #include <cstdio>
  3. #include <algorithm>
  4. #include <string>
  5. #include <vector>
  6. #include <cstring>
  7. #include <cctype>
  8. #include <fstream>
  9.  
  10. // definitions
  11.  
  12. enum {
  13. COMMA,
  14. DIV,
  15. EQUAL,
  16. FOR,
  17. FLOAT,
  18. ID,
  19. INTEGER,
  20. MINUS,
  21. MULTIPLE,
  22. PLUS,
  23. SIMICOLON,
  24. UNDEFINED,
  25. WHILE,
  26. KEYWORD,
  27. BRACKETS,
  28. GEQ,
  29. SEQ,
  30. GREATER,
  31. SMALLER,
  32. ASSIGN};
  33.  
  34. std::string tokens[] = {
  35. "COMMA",
  36. "DIV",
  37. "EQUAL",
  38. "FOR",
  39. "FLOAT",
  40. "ID",
  41. "INTEGER",
  42. "MINUS",
  43. "MULTIPLE",
  44. "PLUS",
  45. "SIMICOLON",
  46. "UNDEFINED",
  47. "WHILE",
  48. "KEYWORD",
  49. "BRACKETS",
  50. "GEQ",
  51. "SEQ",
  52. "GREATER",
  53. "SMALLER",
  54. "ASSIGN"};
  55.  
  56. std::vector<std::pair<int, std::string>> dict;
  57.  
  58. void jump_comment(int& start, std::string& code_text, bool method) {
  59. auto ch = code_text[start];
  60. auto temp = start;
  61. if (method) {
  62. start += 2;
  63. while (true) {
  64. while (ch != '*')
  65. ch = code_text[++start];
  66. if (code_text[start + 1] == '/') {
  67. start += 2;
  68. break;
  69. }
  70. }
  71. }
  72. else {
  73. while (ch != '\n') {
  74. ch = code_text[++start];
  75. }
  76. }
  77. }
  78.  
  79. void jump_head(int& start, std::string& code_text) {
  80. auto ch = code_text[start];
  81. while (ch != '\n')
  82. ch = code_text[++start];
  83. }
  84.  
  85. void get_digit(int& start, std::string& code_text) {
  86. auto ch = code_text[start];
  87. int temp = start;
  88. while (isdigit(ch))
  89. ch = code_text[++temp];
  90.  
  91. if (ch == '.') {
  92. do
  93. ch = code_text[++temp];
  94. while (isdigit(ch));
  95. dict.emplace_back(std::make_pair(
  96. FLOAT, code_text.substr(start, temp - start)));
  97. }
  98. else {
  99. dict.emplace_back(std::make_pair(
  100. INTEGER, code_text.substr(start, temp - start)));
  101. }
  102. start = temp;
  103. }
  104.  
  105. std::string keywords[] = {
  106. "int",
  107. "double",
  108. "void",
  109. "violiate",
  110. "const",
  111. "float",
  112. "return",
  113. "if",
  114. "else"};
  115.  
  116. bool iskeyword(std::string lts) {
  117. for (int i = 0; i < 7; i++) {
  118. if (keywords[i] == lts)
  119. return true;
  120. }
  121. return false;
  122. }
  123.  
  124. void get_entity(int& start, std::string& code_text) {
  125. int temp = start;
  126. char ch = code_text[start];
  127. while (ch != ' ' && ch != '(' && ch != ')' && ch != '=' && ch != '"')
  128. ch = code_text[++temp];
  129. std::string buf = code_text.substr(start, temp - start);
  130. if (iskeyword(buf))
  131. dict.emplace_back(std::make_pair(KEYWORD, buf));
  132. else
  133. dict.emplace_back(std::make_pair(ID, buf));
  134.  
  135. start = temp;
  136. }
  137.  
  138. std::string load_file(char* filename) {
  139. std::ifstream ifs(filename, std::ios::in | std::ios::binary | std::ios::ate);
  140.  
  141. std::ifstream::pos_type fileSize = ifs.tellg();
  142. ifs.seekg(0, std::ios::beg);
  143.  
  144. std::vector<char> bytes(fileSize);
  145. ifs.read(bytes.data(), fileSize);
  146.  
  147. return std::string(bytes.data(), fileSize);
  148. }
  149.  
  150. void lex(std::string context) {
  151. for (int index = 0; index < context.length(); index++) {
  152. char ch = context[index];
  153.  
  154. if (ch <= ' ')
  155. continue;
  156.  
  157. if (ch <= '9' && ch >= '0') {
  158. get_digit(index, context);
  159. ch = context[index];
  160. }
  161. if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
  162. get_entity(index, context);
  163. ch = context[index];
  164. }
  165.  
  166. switch (ch) {
  167. case '+':
  168. dict.emplace_back(std::make_pair(PLUS, "+"));
  169. break;
  170. case ';':
  171. dict.emplace_back(std::make_pair(SIMICOLON, ";"));
  172. break;
  173. case ',':
  174. dict.emplace_back(std::make_pair(COMMA, ","));
  175. break;
  176. case '=':
  177. if (index + 1 < context.length() && context[index + 1] == '=') {
  178. dict.emplace_back(std::make_pair(EQUAL, "=="));
  179. index += 1;
  180. }
  181. else
  182. dict.emplace_back(std::make_pair(ASSIGN, "="));
  183. break;
  184. case '/':
  185. if (index + 1 < context.length() && context[index + 1] == '/')
  186. jump_comment(index, context, 0);
  187. else if (index + 1 < context.length() && context[index + 1] == '*')
  188. jump_comment(index, context, 1);
  189. else
  190. dict.emplace_back(std::make_pair(DIV, "/"));
  191. break;
  192. case '*':
  193. dict.emplace_back(std::make_pair(MULTIPLE, "*"));
  194. break;
  195. case '(':
  196. dict.emplace_back(std::make_pair(BRACKETS, "("));
  197. break;
  198. case ')':
  199. dict.emplace_back(std::make_pair(BRACKETS, ")"));
  200. break;
  201. case '#':
  202. jump_head(index, context);
  203. break;
  204. case '>':
  205. if (index + 1 < context.length() && context[index + 1] == '=') {
  206. dict.emplace_back(std::make_pair(GEQ, ">="));
  207. index++;
  208. }
  209. else
  210. dict.emplace_back(std::make_pair(GREATER, ">"));
  211. break;
  212. case '<':
  213. if (index + 1 < context.length() && context[index + 1] == '=') {
  214. dict.emplace_back(std::make_pair(SEQ, "<="));
  215. index++;
  216. }
  217. else
  218. dict.emplace_back(std::make_pair(SMALLER, "<"));
  219. break;
  220. default:
  221. break;
  222. }
  223. }
  224. }
  225.  
  226. int main() {
  227. //lex("C:\\Users\\10716\\Desktop\\lex\\a.txt");
  228. std::string text = load_file("file path here");
  229. std::string digit = "123.456 123";
  230. int start = 0;
  231. //get_digit(start, digit);
  232. lex(text);
  233. for (int i = 0; i < dict.size(); i++) {
  234. std::cout << tokens[dict[i].first] << " <---> " << dict[i].second << std::endl;
  235. }
  236. return 0;
  237. }
  238.  
  239. /* input sample
  240. #include <iostream>
  241. #include <cstdio>
  242.  
  243. int main() {
  244. // aaaaa
  245. /* qwertyui*/
  246. int a = 0;
  247. float b = 123.456;
  248. if (a == b) a = 1;
  249. return 0;
  250. }
  251. */
  252.  
  253. /* output sample
  254. KEYWORD <---> int
  255. ID <---> main
  256. BRACKETS <---> (
  257. BRACKETS <---> )
  258. KEYWORD <---> int
  259. ID <---> a
  260. ASSIGN <---> =
  261. INTEGER <---> 0
  262. SIMICOLON <---> ;
  263. KEYWORD <---> float
  264. ID <---> b
  265. ASSIGN <---> =
  266. FLOAT <---> 123.456
  267. SIMICOLON <---> ;
  268. ID <---> if
  269. BRACKETS <---> (
  270. ID <---> a
  271. EQUAL <---> ==
  272. ID <---> b
  273. BRACKETS <---> )
  274. ID <---> a
  275. ASSIGN <---> =
  276. INTEGER <---> 1
  277. SIMICOLON <---> ;
  278. KEYWORD <---> return
  279. INTEGER <---> 0
  280. SIMICOLON <---> ;
  281. */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement