Advertisement
Guest User

Untitled

a guest
Apr 1st, 2015
193
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.53 KB | None | 0 0
  1. #include <vector>
  2. #include <ctype.h>
  3. #include <string>
  4.  
  5. typedef std::vector<std::string> string_list;
  6. typedef std::vector<long long > int_list;
  7. typedef std::vector<long double> float_list;
  8.  
  9. std::string substr(const char* value, size_t length){
  10. std::string v;
  11. v.resize(length);
  12. memcpy(&v[0], value, length * sizeof(char));
  13. return v;
  14. }
  15.  
  16. long long string_to_int(const char* value, size_t length){
  17. return atoll(substr(value, length).c_str());
  18. }
  19. long double string_to_float(const char* value, size_t length){
  20. return atof(substr(value, length).c_str());
  21. }
  22.  
  23.  
  24. void int_list_add(int_list& list, long long value){
  25. list.push_back(value);
  26. }
  27. void string_list_add(string_list& list, const char* value, size_t length){
  28. list.push_back(substr(value, length));
  29. }
  30. void float_list_add(float_list& list, long double value){
  31. list.push_back(value);
  32. }
  33. size_t int_list_last(int_list& list){
  34. return list.size();
  35. }
  36. size_t string_list_last(string_list& list){
  37. return list.size();
  38. }
  39. size_t float_list_last(float_list& list){
  40. return list.size();
  41. }
  42.  
  43.  
  44.  
  45. typedef struct{
  46. string_list identifiers;
  47. string_list constants_string;
  48. int_list constants_int;
  49. float_list constants_float;
  50. size_t id;
  51. } *state, state_value;
  52.  
  53. state tok_state_create(){
  54. state ret = new state_value;
  55. ret->id = 0;
  56. return ret;
  57. }
  58. void tok_state_destroy(state t_state){
  59. delete t_state;
  60. }
  61. const char* tok_state_read_identifier(state t_state, size_t id){
  62. return t_state->identifiers[id - 1].c_str();
  63. }
  64. const char* tok_state_read_string(state t_state, size_t id){
  65. return t_state->constants_string[id - 1].c_str();
  66. }
  67. long long tok_state_read_int(state t_state, size_t id){
  68. return t_state->constants_int[id - 1];
  69. }
  70. long double tok_state_read_float(state t_state, size_t id){
  71. return t_state->constants_float[id - 1];
  72. }
  73.  
  74.  
  75.  
  76. const char* punct_tokens[] = { "Not A Token (Dummy)",
  77. ".", ",", "<", "<<", ">", ">>",
  78. ";", "+", "-", "/", "*", "!", "%", "^",
  79. "&", "(", ")", "=", "==", "[", "]", "{",
  80. "}", "?", ":", "|", "||", "&&", "~", 0
  81. };
  82.  
  83. const char* key_tokens[] = { "Not A Token (Dummy)",
  84. "if", "while", "do", "then", "end", 0
  85. };
  86.  
  87. typedef enum{
  88. TOK_TYPE_INTEGER = 500,
  89. TOK_TYPE_FLOAT,
  90. TOK_TYPE_STRING,
  91. TOK_TYPE_IDENTIFIER,
  92. TOK_TYPE_NONE
  93. } tok_type;
  94.  
  95. const char* get_token_from_id(size_t id){
  96. if (id < 100){
  97. return punct_tokens[id];
  98. }
  99. if (id < 200){
  100. return key_tokens[id - 100];
  101. }
  102. if (id >= 500){
  103. switch (id){
  104. case TOK_TYPE_INTEGER: return "Integer Constant";
  105. case TOK_TYPE_FLOAT: return "Float Constant ";
  106. case TOK_TYPE_STRING: return "String Constant ";
  107. case TOK_TYPE_IDENTIFIER: return "Identifier ";
  108. case TOK_TYPE_NONE: return "Unknown ";
  109. default:
  110. break;
  111. }
  112. }
  113. return "Not A Token (Dummy)";
  114. }
  115.  
  116. int is_identifier_char(char c){
  117. if (isalpha(c) || c == '_'){
  118. return 1;
  119. }
  120. return 0;
  121. }
  122.  
  123. size_t read_punct_token(const char* input, size_t size){
  124. size_t max_len = 0;
  125. size_t token_id = 0;
  126. for (size_t i = 1; punct_tokens[i] != 0; ++i){
  127. size_t len = strlen(punct_tokens[i]);
  128. if (len > max_len && len <= size && strncmp(punct_tokens[i], input, len) == 0){
  129. max_len = len;
  130. if (i == 1 && size > 1 && isdigit(input[1])){
  131. return 0; //Special case for floats
  132. }
  133. token_id = i;
  134. }
  135. }
  136. return token_id;
  137. }
  138.  
  139. size_t read_key_token(const char* input, size_t size){
  140. size_t max_len = 0;
  141. size_t token_id = 0;
  142. for (size_t i = 1; key_tokens[i] != 0; ++i){
  143. size_t len = strlen(key_tokens[i]);
  144. if (len > max_len && len <= size && strncmp(key_tokens[i], input, len) == 0){
  145. max_len = len;
  146. token_id = i + 100;
  147. }
  148. }
  149. return token_id;
  150. }
  151.  
  152.  
  153. size_t is_punct_token_char(char c){
  154. for (size_t i = 1; punct_tokens[i] != 0; ++i){
  155. if (punct_tokens[i][0] == c){
  156. return 1;
  157. }
  158. }
  159. return 0;
  160. }
  161.  
  162.  
  163. void add_token(state t_state, tok_type type, const char* string, size_t length){
  164. switch (type){
  165. case TOK_TYPE_INTEGER:
  166. int_list_add(t_state->constants_int, string_to_int(string, length));
  167. t_state->id = int_list_last(t_state->constants_int);
  168. break;
  169. case TOK_TYPE_FLOAT:
  170. float_list_add(t_state->constants_float, string_to_float(string, length));
  171. t_state->id = float_list_last(t_state->constants_float);
  172. break;
  173. case TOK_TYPE_STRING:
  174. string_list_add(t_state->constants_string, string, length);
  175. t_state->id = string_list_last(t_state->constants_string);
  176. break;
  177. case TOK_TYPE_IDENTIFIER:
  178. string_list_add(t_state->identifiers, string, length);
  179. t_state->id = string_list_last(t_state->identifiers);
  180. break;
  181. default:
  182. //Do some error here
  183. break;
  184. }
  185. }
  186.  
  187. size_t get_token(state t_state, char** input, size_t *size){
  188. if (t_state->id != 0){
  189. size_t id = t_state->id;
  190. t_state->id = 0;
  191. return id;
  192. }
  193. char* base = *input;
  194. size_t padding = 0;
  195. size_t length = 0;
  196. tok_type type = TOK_TYPE_NONE;
  197. while (*size > 0){
  198. if (isspace(*base)){
  199. base++;
  200. (*size)--;
  201. }
  202. else{
  203. break;
  204. }
  205. }
  206.  
  207. size_t tok = read_punct_token(base, *size);
  208. if (tok){
  209. size_t len = +strlen(get_token_from_id(tok));
  210. *input = base + len;
  211. *size -= len;
  212. return tok;
  213. }
  214. tok = read_key_token(base, *size);
  215. if (tok){
  216. size_t len = +strlen(get_token_from_id(tok));
  217. *input = base + len;
  218. *size -= len;
  219. return tok;
  220. }
  221.  
  222. while (*size - length > 0){
  223. if (length == 0 && type == TOK_TYPE_NONE){
  224. if (is_identifier_char(*base)){
  225. type = TOK_TYPE_IDENTIFIER;
  226. length++;
  227. }
  228. else if (*base == '"'){
  229. type = TOK_TYPE_STRING;
  230. padding = 1;
  231. base++;
  232. (*size)--;
  233. }
  234. else if (*base == '.' && *size > 1 && isdigit(base[1])){
  235. type = TOK_TYPE_FLOAT;
  236. }
  237. else if (isdigit(*base)){
  238. type = TOK_TYPE_INTEGER;
  239. }
  240. else if (is_punct_token_char(*base)){
  241. tok = read_punct_token(base, *size);
  242. if (tok){
  243. size_t len = strlen(punct_tokens[tok]);
  244. *input += len;
  245. *size -= len;
  246. return tok;
  247. }
  248. else{
  249. //do error
  250. }
  251. }
  252. }
  253. else{
  254. if (!isspace(base[length]) || type == TOK_TYPE_STRING){
  255. switch (type){
  256. case TOK_TYPE_INTEGER:
  257. if (isdigit(base[length])){
  258. length++;
  259. continue;
  260. }
  261. else if (base[length] == '.' || tolower(base[length]) == 'e'){
  262. type = TOK_TYPE_FLOAT;
  263. length++;
  264. continue;
  265. }
  266. break;
  267. case TOK_TYPE_FLOAT:
  268. if (isdigit(base[length]) || base[length] == '.' || base[length] == 'e'){
  269. length++;
  270. continue;
  271. }
  272. break;
  273. case TOK_TYPE_STRING:
  274. if (base[length] != '"'){
  275. length++;
  276. continue;
  277. }
  278. break;
  279. case TOK_TYPE_IDENTIFIER:
  280. if (is_identifier_char(base[length])){
  281. length++;
  282. continue;
  283. }
  284. break;
  285. default:
  286. break;
  287. }
  288. }
  289. //We only get here if this is a space or any of the switch cases didn't continue.
  290. add_token(t_state, type, base, length);
  291. *input = base + length + padding;
  292. *size -= length + padding;
  293. return type;
  294. }
  295. }
  296. *input = base + length + padding;
  297. *size -= length + padding;
  298. return 0;
  299. }
  300.  
  301. int main(){
  302. const char* input = "if(1+1==4)then print"hi!";end";
  303. state s = tok_state_create();
  304. size_t size = strlen(input);
  305. size_t token;
  306. size_t token_prev = 0;
  307. printf("TokentMeaningnn");
  308.  
  309. while ((token = get_token(s, (char**)&input, &size)) != 0){
  310. if (token_prev < 500){
  311. if (token < 500){
  312. printf("%dt%sn", token, get_token_from_id(token));
  313. }
  314. else{
  315. printf("%dt%s #", token, get_token_from_id(token));
  316. }
  317. }
  318. else{
  319. printf("%dt", token);
  320. switch (token_prev){
  321. case TOK_TYPE_IDENTIFIER: printf("%sn", tok_state_read_identifier(s, token)); break;
  322. case TOK_TYPE_STRING: printf("%sn", tok_state_read_string(s, token)); break;
  323. case TOK_TYPE_INTEGER: printf("%dn", tok_state_read_int(s, token)); break;
  324. case TOK_TYPE_FLOAT: printf("%fn", tok_state_read_float(s, token)); break;
  325.  
  326. }
  327. }
  328. token_prev = token;
  329. }
  330.  
  331. tok_state_destroy(s);
  332. }
  333.  
  334. Token Meaning
  335.  
  336. 101 if
  337. 16 (
  338. 500 Integer Constant #1 1
  339. 8 +
  340. 500 Integer Constant #2 1
  341. 19 ==
  342. 500 Integer Constant #3 4
  343. 17 )
  344. 104 then
  345. 503 Identifier #1 print
  346. 502 String Constant #1 hi!
  347. 7 ;
  348. 105 end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement