Advertisement
Guest User

Untitled

a guest
Mar 27th, 2017
45
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.44 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <stdarg.h>
  4. #include <ctype.h>
  5. #include <string.h>
  6.  
  7.  
  8. typedef struct _Token {
  9. int code; // codul (numele)
  10. union {
  11. char *text; // folosit pentru ID, CT_STRING (alocat dinamic)
  12. long int i; // folosit pentru CT_INT, CT_CHAR
  13. double r; // folosit pentru CT_REAL
  14. };
  15. int line; // linia din fisierul de intrare
  16. struct _Token *next; // inlantuire la urmatorul AL
  17. }Token;
  18.  
  19. void err(const char *fmt, ...) {
  20. va_list va;
  21. va_start(va, fmt);
  22. fprintf(stderr, "error: ");
  23. vfprintf(stderr, fmt, va);
  24. fputc('\n', stderr);
  25. va_end(va);
  26. exit(-1);
  27. }
  28.  
  29.  
  30. #define SAFEALLOC(var,Type)if((var=(Type*)malloc(sizeof(Type)))==NULL)err("not enough memory");
  31.  
  32. enum atomi {
  33. ID, END,
  34. CT_INT, CT_REAL, CT_CHAR, CT_STRING,
  35. COMA, SEMICOLON, LPAR, RPAR, LBRACKET, RBRACKET, LACC, RACC,
  36. ADD, SUB, MUL, DIV, DOT, AND, OR, NOT, ASSIGN, EQUAL, NOTEQ, LESS, LESSEQ, GREATER, GREATEREQ,
  37. BREAK, CHAR, DOUBLE, ELSE, FOR, IF, INT, RETURN, STRUCT, VOID, WHILE
  38. };
  39.  
  40. char buff[50001];
  41. Token *lastToken = NULL;
  42. Token *tokens = NULL;
  43. char *pCrtCh = buff;
  44. int tkline = 0;
  45. char *pStartCh;
  46.  
  47. Token *addTk(int code)
  48. {
  49. Token *tk;
  50. SAFEALLOC(tk, Token);
  51. tk->code = code;
  52. tk->line = tkline;
  53. tk->next = NULL;
  54. if (lastToken) {
  55. lastToken->next = tk;
  56. }
  57. else {
  58. tokens = tk;
  59. }
  60. lastToken = tk;
  61. return tk;
  62. }
  63.  
  64. void tkerr(const Token *tk, const char *fmt, ...)
  65. {
  66. va_list va;
  67. va_start(va, fmt);
  68. fprintf(stderr, "error in line %d: ", tk->line);
  69. vfprintf(stderr, fmt, va);
  70. fputc('\n', stderr);
  71. va_end(va);
  72. exit(-1);
  73. }
  74.  
  75.  
  76.  
  77. char *createString(const char *st, char *end) {
  78.  
  79. char *string = (char*)malloc(sizeof(char)*(end - st + 1));
  80. int cnt = 0;
  81. while (st <= end) {
  82. if ((*st) != '\\') {
  83. string[cnt] = *st;
  84. cnt++;
  85. }
  86. else {
  87. if (*(st + 1) == 't') {
  88. string[cnt] = '\t';
  89. cnt++;
  90. st++;
  91. }
  92. else if (*(st + 1) == 'n') {
  93. string[cnt] = '\n';
  94. cnt++;
  95. st++;
  96. }
  97. else if (*(st + 1) == '?')
  98. {
  99. string[cnt] = '\?';
  100. cnt++;
  101. st++;
  102. }
  103. }
  104. st++;
  105. }
  106. string[cnt] = '\0';
  107. return string;
  108. }
  109. int getNextToken() {
  110. int state = 0;
  111. int nCh;
  112. char ch;
  113.  
  114. Token *tk;
  115.  
  116. while (1) {
  117. ch = *pCrtCh;
  118. switch (state) {
  119. case 0:
  120. if (ch == ',') {
  121. pCrtCh++;
  122. state = 33;
  123. }
  124. else if (ch == ';') {
  125. pCrtCh++;
  126. state = 34;
  127. }
  128. else if (ch == '(') {
  129. pCrtCh++;
  130. state = 35;
  131. }
  132. else if (ch == ')') {
  133. pCrtCh++;
  134. state = 36;
  135. }
  136. else if (ch == '[') {
  137. pCrtCh++;
  138. state = 38;
  139. }
  140. else if (ch == ']') {
  141. pCrtCh++;
  142. state = 37;
  143. }
  144. else if (ch == '{') {
  145. pCrtCh++;
  146. state = 39;
  147. }
  148. else if (ch == '}') {
  149. pCrtCh++;
  150. state = 40;
  151. }
  152. else if (ch == '+') {
  153. pCrtCh++;
  154. state = 48;
  155. }
  156. else if (ch == '-') {
  157. pCrtCh++;
  158. state = 49;
  159. }
  160. else if (ch == '*') {
  161. pCrtCh++;
  162. state = 50;
  163. }
  164. else if (ch == '.') {
  165. pCrtCh++;
  166. state = 47;
  167. }
  168. else if (ch == '&') {
  169. pCrtCh++;
  170. state = 51;
  171. }
  172. else if (ch == '|') {
  173. pCrtCh++;
  174. state = 53;
  175. }
  176. else if (ch == '/') {
  177. pCrtCh++;
  178. state = 24;
  179. }
  180. else if (ch == '=') {
  181. pCrtCh++;
  182. state = 55;
  183. }
  184. else if (ch == '!') {
  185. pCrtCh++;
  186. state = 58;
  187. }
  188. else if (ch == '>') {
  189. pCrtCh++;
  190. state = 41;
  191. }
  192. else if (ch == '<') {
  193. pCrtCh++;
  194. state = 44;
  195. }
  196. else if (ch == ' ' || ch == '\r' || ch == '\t') {
  197. pCrtCh++;
  198. }
  199. else if (ch == '\n') {
  200. pCrtCh++;
  201. tkline++;
  202. }
  203. else if (isalpha(ch) || ch == '_') {
  204. pStartCh = pCrtCh;
  205. pCrtCh++;
  206. state = 31;
  207. }
  208. else if (ch == '\'') {
  209. pCrtCh++;
  210. state = 14;
  211. }
  212. else if (ch == '"') {
  213. pStartCh = pCrtCh;
  214. pCrtCh++;
  215. state = 19;
  216. }
  217. else if (ch == '0') {
  218. pStartCh = pCrtCh;
  219. pCrtCh++;
  220. state = 3;
  221. }
  222. else if (isdigit(ch) && ch - '0' != 0) {
  223. pStartCh = pCrtCh;
  224. pCrtCh++;
  225. state = 1;
  226. }
  227. else tkerr(addTk(END), "caracter invalid");
  228. break;
  229. case 31:
  230. if (isalpha(ch) || isdigit(ch) || ch == '_') {
  231. pCrtCh++;
  232. }
  233. else {
  234. state = 32;
  235. }
  236. break;
  237. case 32:
  238. nCh = pCrtCh - pStartCh;
  239. if (nCh == 5 && !memcmp(pStartCh, "break", 5))
  240. tk = addTk(BREAK);
  241. else if (nCh == 4 && !memcmp(pStartCh, "char", 4))
  242. tk = addTk(CHAR);
  243. else if (nCh == 5 && !memcmp(pStartCh, "double", 5))
  244. tk = addTk(DOUBLE);
  245. else if (nCh == 4 && !memcmp(pStartCh, "else", 4))
  246. tk = addTk(ELSE);
  247. else if (nCh == 3 && !memcmp(pStartCh, "for", 3))
  248. tk = addTk(FOR);
  249. else if (nCh == 2 && !memcmp(pStartCh, "if", 2))
  250. tk = addTk(IF);
  251. else if (nCh == 3 && !memcmp(pStartCh, "int", 3))
  252. tk = addTk(INT);
  253. else if (nCh == 6 && !memcmp(pStartCh, "return", 6))
  254. tk = addTk(RETURN);
  255. else if (nCh == 6 && !memcmp(pStartCh, "struct", 6))
  256. tk = addTk(STRUCT);
  257. else if (nCh == 4 && !memcmp(pStartCh, "void", 4))
  258. tk = addTk(VOID);
  259. else if (nCh == 5 && !memcmp(pStartCh, "while", 5))
  260. tk = addTk(WHILE);
  261. else {
  262. tk = addTk(ID);
  263. tk->text = createString(pStartCh, pCrtCh - 1);
  264. }
  265. return tk->code;
  266. case 1:
  267. if (isdigit(ch)) {
  268. pCrtCh++;
  269. }
  270. else if (ch == '.') {
  271. pCrtCh++;
  272. state = 8;
  273. }
  274. else if (ch == 'e' || ch == 'E') {
  275. pCrtCh++;
  276. state = 10;
  277. }
  278. else {
  279. state = 2;
  280. }
  281. break;
  282. case 2:
  283. tk = addTk(CT_INT);
  284. if ((*(pStartCh + 1)) == 'x') {
  285. tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 16);
  286. }
  287. else if ((*pStartCh) == '0') {
  288. tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 8);
  289. }
  290. else
  291. tk->i = tk->i = strtol(createString(pStartCh, pCrtCh - 1), NULL, 10);
  292. return CT_INT;
  293. case 3:
  294. if (ch == 'x') {
  295. pCrtCh++;
  296. state = 5;
  297. }
  298. else if (ch == '9' || ch == '8') {
  299. pCrtCh++;
  300. state = 7;
  301. }
  302. else {
  303. pCrtCh++;
  304. state = 4;
  305. }
  306. break;
  307. case 4:
  308. if (ch - '0' <= 7 && ch - '0' >= 0) {
  309. pCrtCh++;
  310. }
  311. else if (ch == '.')
  312. {
  313. pCrtCh++;
  314. state = 9;
  315. }
  316. else if (ch == 'e' || ch == 'E')
  317. {
  318. pCrtCh++;
  319. state = 10;
  320. }
  321. else if (ch == '9' || ch == '8')
  322. {
  323. pCrtCh++;
  324. state = 7;
  325. }
  326. else {
  327. state = 2;
  328. }
  329. break;
  330. case 5:
  331. if (isdigit(ch) || (tolower(ch) <= 'f' && tolower(ch) >= 'a')) {
  332. pCrtCh++;
  333. state = 6;
  334. }
  335. else tkerr(addTk(END), "Caracter invalid la starea 5.\n");
  336. break;
  337. case 6:
  338. if (isdigit(ch) || (tolower(ch) <= 'f' && tolower(ch) >= 'a')) {
  339. pCrtCh++;
  340. }
  341. else {
  342. state = 2;
  343. }
  344. break;
  345. case 7:
  346. if (ch == '.')
  347. {
  348. pCrtCh++;
  349. state = 8;
  350. }
  351. else tkerr(addTk(END), "Caracter invalid la starea 7.\n");
  352. break;
  353.  
  354. case 8:
  355. if (isdigit(ch))
  356. {
  357. pCrtCh++;
  358. state = 9;
  359. }
  360. else tkerr(addTk(END), "Caracter invalid la starea 8.\n");
  361. break;
  362. case 9:
  363. if (isdigit(ch))
  364. pCrtCh++;
  365. else if (ch == 'e' || ch == 'E')
  366. {
  367. pCrtCh++;
  368. state = 10;
  369. }
  370. else
  371. state = 13;
  372. break;
  373. case 10:
  374. if (ch == '-' || ch == '+') {
  375. pCrtCh++;
  376. state = 11;
  377. }
  378. else if (isdigit(ch)) {
  379. pCrtCh++;
  380. state = 12;
  381. }
  382. else tkerr(addTk(END), "Caracter invalid la starea 10.\n");
  383. break;
  384. case 11:
  385. if (isdigit(ch)) {
  386. pCrtCh++;
  387. state = 12;
  388. }
  389. else tkerr(addTk(END), "Caracter invalid la starea 11.\n");
  390. break;
  391. case 12:
  392. if (isdigit(ch)) {
  393. pCrtCh++;
  394. }
  395. else state = 13;
  396. break;
  397. case 13:
  398. tk = addTk(CT_REAL);
  399. tk->r = atof(createString(pStartCh, pCrtCh));
  400. return CT_REAL;
  401. case 14:
  402. //pStartCh = pCrtCh;
  403. if (ch == '\\')
  404. {
  405. pCrtCh++;
  406. state = 16;
  407. }
  408. else
  409. state = 17;
  410. break;
  411. case 16:
  412. if (ch == 'a' || ch == 'b' || ch == 'f' || ch == 'r' || ch == 'n' || ch == 't' || ch == 'v' || ch == '\'' || ch == '?' || ch == '"' || ch == '0' || ch == '\\') {
  413. pCrtCh++;
  414. state = 17;
  415. }
  416. else tkerr(addTk(END), "Caracter invalid la starea 16.\n");
  417. break;
  418.  
  419. case 17:
  420. if (ch == '\'')
  421. {
  422. pCrtCh++;
  423. state = 18;
  424. }
  425. case 18:
  426. tk = addTk(CT_CHAR);
  427.  
  428. if (*(pStartCh + 1) == 'n')
  429. {
  430. tk->i = '\n';
  431. break;
  432. }
  433. else if (*(pStartCh + 1) == 't')
  434. {
  435. tk->i = '\t';
  436. break;
  437. }
  438. else if (*(pStartCh + 1) == '\\')
  439. {
  440. tk->i = '\\';
  441. break;
  442. }
  443. else
  444. {
  445.  
  446. tk->i = *pCrtCh - '0';
  447. return CT_CHAR;
  448. }
  449. case 19:
  450. if (ch == '\\')
  451. {
  452. pCrtCh++;
  453. state = 21;
  454. }
  455. else if (ch == '"')
  456. {
  457. pCrtCh++;
  458. state = 23;
  459. }
  460. else
  461. pCrtCh++;
  462. break;
  463. case 21:
  464. if (ch == 'a' || ch == 'b' || ch == 'f' || ch == 'r' || ch == 'n' || ch == 't' || ch == 'v' || ch == '\'' || ch == '?' || ch == '"' || ch == '0' || ch == '\\') {
  465. pCrtCh++;
  466. state = 22;
  467. }
  468. else tkerr(addTk(END), "Caracter invalid la starea 21.\n");
  469. break;
  470. case 22:
  471. if (ch == '"')
  472. {
  473. pCrtCh++;
  474. state = 23;
  475. }
  476. else
  477. state = 19;
  478. break;
  479. case 23:
  480.  
  481. tk = addTk(CT_STRING);
  482. tk->text = createString(pStartCh + 1, pCrtCh - 2);
  483. return CT_STRING;
  484. case 24:
  485. if (ch == '*')
  486. {
  487. pCrtCh++;
  488. state = 26;
  489. }
  490. else if (ch == '/')
  491. {
  492. pCrtCh++;
  493. state = 61;
  494. }
  495. else
  496. state = 25;
  497. break;
  498. case 25:
  499. addTk(DIV);
  500. return DIV;
  501. case 26:
  502. if (ch == '*')
  503. {
  504. pCrtCh++;
  505. state = 27;
  506. }
  507. else
  508. pCrtCh++;
  509. break;
  510. case 27:
  511. if (ch == '*')
  512. pCrtCh++;
  513. if (ch == '/')
  514. state = 0;
  515. else
  516. state = 26;
  517. break;
  518. case 33:
  519. addTk(COMA);
  520. return COMA;
  521. case 34:
  522. addTk(SEMICOLON);
  523. return SEMICOLON;
  524. case 35:
  525. addTk(LPAR);
  526. return LPAR;
  527. case 36:
  528. addTk(RPAR);
  529. return RPAR;
  530. case 38:
  531. addTk(LBRACKET);
  532. return LBRACKET;
  533. case 37:
  534. addTk(RBRACKET);
  535. return RBRACKET;
  536. case 39:
  537. addTk(LACC);
  538. return LACC;
  539. case 40:
  540. addTk(RACC);
  541. return RACC;
  542. case 41:
  543. if (ch == '=')
  544. {
  545. pCrtCh++;
  546. state = 43;
  547. }
  548. else state = 42;
  549. break;
  550. case 42:
  551. addTk(GREATER);
  552. return GREATER;
  553. case 43:
  554. addTk(GREATEREQ);
  555. return GREATEREQ;
  556. case 44:
  557. if (ch == '=')
  558. {
  559. pCrtCh++;
  560. state = 46;
  561. }
  562. else
  563. state = 45;
  564. break;
  565. case 45:
  566. addTk(LESS);
  567. return LESS;
  568. case 46:
  569. addTk(LESSEQ);
  570. return LESSEQ;
  571. case 47:
  572. addTk(DOT);
  573. return DOT;
  574. case 48:
  575. addTk(ADD);
  576. return ADD;
  577. case 49:
  578. addTk(SUB);
  579. return SUB;
  580. case 50:
  581. addTk(MUL);
  582. return MUL;
  583. case 51:
  584. if (ch == '&')
  585. {
  586. pCrtCh++;
  587. state = 52;
  588. break;
  589. }
  590. else
  591. tkerr(addTk(END), "Caracter invalid la starea 51.\n");
  592. break;
  593. case 52:
  594. addTk(AND);
  595. return AND;
  596. case 53:
  597. if (ch == '|')
  598. {
  599.  
  600. pCrtCh++;
  601. state = 54;
  602. }
  603. else tkerr(addTk(END), "Caracter invalid la starea 53.\n");
  604. break;
  605. case 54:
  606. addTk(OR);
  607. return OR;
  608. case 55:
  609. if (ch == '=')
  610. {
  611. pCrtCh++;
  612. state = 57;
  613. }
  614. else
  615. state = 56;
  616. break;
  617. case 56:
  618. addTk(ASSIGN);
  619. return ASSIGN;
  620. case 57:
  621. addTk(EQUAL);
  622. return EQUAL;
  623. case 58:
  624. if (ch == '=')
  625. {
  626. pCrtCh++;
  627. state = 59;
  628. }
  629. else
  630. state = 60;
  631. break;
  632. case 59:
  633. addTk(NOTEQ);
  634. return NOTEQ;
  635. case 60:
  636. addTk(NOT);
  637. return NOT;
  638. case 61:
  639. if (ch != '\n' || ch != '\t' || ch != '\r')
  640. pCrtCh++;
  641. else
  642. state = 0;
  643. break;
  644.  
  645.  
  646.  
  647. }
  648. }
  649. }
  650.  
  651. void afisare() {
  652. char *it = pCrtCh;
  653. while ((*it) != '\0') {
  654. printf("%c", *it);
  655. it++;
  656. }
  657. printf("\n");
  658. }
  659. void printAtom(Token *tk) {
  660. if (tk->code == END) {
  661. printf("END\n");
  662. }
  663. else if (tk->code == COMA) {
  664. printf("COMMA ");
  665. }
  666. else if (tk->code == SEMICOLON) {
  667. printf("SEMICOLON ");
  668. }
  669. else if (tk->code == LPAR) {
  670. printf("LPAR ");
  671. }
  672. else if (tk->code == RPAR) {
  673. printf("RPAR ");
  674. }
  675. else if (tk->code == LBRACKET) {
  676. printf("LBRACKET ");
  677. }
  678. else if (tk->code == RBRACKET) {
  679. printf("RBRACKET ");
  680. }
  681. else if (tk->code == LACC) {
  682. printf("LACC ");
  683. }
  684. else if (tk->code == RACC) {
  685. printf("RACC ");
  686. }
  687. else if (tk->code == ADD) {
  688. printf("ADD ");
  689. }
  690. else if (tk->code == SUB) {
  691. printf("SUB ");
  692. }
  693. else if (tk->code == MUL) {
  694. printf("MUL ");
  695. }
  696. else if (tk->code == DOT) {
  697. printf("DOT ");
  698. }
  699. else if (tk->code == AND) {
  700. printf("AND ");
  701. }
  702. else if (tk->code == OR) {
  703. printf("OR ");
  704. }
  705. else if (tk->code == DIV) {
  706. printf("DIV ");
  707. }
  708. else if (tk->code == NOT) {
  709. printf("NOT ");
  710. }
  711. else if (tk->code == NOTEQ) {
  712. printf("NOTEQ ");
  713. }
  714. else if (tk->code == ASSIGN) {
  715. printf("ASSIGN ");
  716. }
  717. else if (tk->code == EQUAL) {
  718. printf("EQUAL ");
  719. }
  720. else if (tk->code == GREATER) {
  721. printf("GREATER ");
  722. }
  723. else if (tk->code == GREATEREQ) {
  724. printf("GREATEREQ ");
  725. }
  726. else if (tk->code == LESS) {
  727. printf("LESS ");
  728. }
  729. else if (tk->code == LESSEQ) {
  730. printf("LESSEQ ");
  731. }
  732. else if (tk->code == ID) {
  733. printf("ID:%s ", tk->text);
  734. }
  735. else if (tk->code == CT_CHAR) {
  736. printf("CT_CHAR:%c ", tk->i + '0');
  737. }
  738. else if (tk->code == CT_STRING) {
  739. printf("CT_STRING:%s ", tk->text);
  740. }
  741. else if (tk->code == CT_INT) {
  742. printf("CT_INT:%d ", tk->i);
  743. }
  744. else if (tk->code == CT_REAL) {
  745. printf("CT_REAL:%f ", tk->r);
  746. }
  747. else if (tk->code == BREAK) {
  748. printf("BREAK ");
  749. }
  750. else if (tk->code == CHAR) {
  751. printf("CHAR ");
  752. }
  753. else if (tk->code == DOUBLE) {
  754. printf("DOUBLE ");
  755. }
  756. else if (tk->code == ELSE) {
  757. printf("ELSE ");
  758. }
  759. else if (tk->code == FOR) {
  760. printf("FOR ");
  761. }
  762. else if (tk->code == IF) {
  763. printf("IF ");
  764. }
  765. else if (tk->code == INT) {
  766. printf("INT ");
  767. }
  768. else if (tk->code == RETURN) {
  769. printf("RETURN ");
  770. }
  771. else if (tk->code == STRUCT) {
  772. printf("STRUCT ");
  773. }
  774. else if (tk->code == VOID) {
  775. printf("VOID ");
  776. }
  777. else if (tk->code == WHILE) {
  778. printf("WHILE ");
  779. }
  780. }
  781.  
  782. void Atoms_afis() {
  783. Token *tk1 = tokens;
  784. printf("Result:\n");
  785. while (tk1 != NULL) {
  786. printAtom(tk1);
  787. tk1 = tk1->next;
  788. }
  789. printf("\n");
  790. }
  791.  
  792. int main() {
  793. FILE *f;
  794. int noCh;
  795.  
  796. if ((f = fopen("a.txt", "r")) == NULL) {
  797. printf("Eroare la deschiderea fisierului\n");
  798. exit(-1);
  799. }
  800.  
  801. if ((noCh = fread(buff, 1, 50000, f)) <= 0) {
  802. printf("Eroare la citirea din fisier\n");
  803. exit(-1);
  804. }
  805. buff[noCh] = '\0';
  806. afisare();
  807.  
  808. while ((*pCrtCh) != '\0') {
  809. getNextToken();
  810. }
  811. addTk(END);
  812. Atoms_afis();
  813. fclose(f);
  814. return 0;
  815. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement