Advertisement
Guest User

Untitled

a guest
Sep 11th, 2019
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 18.40 KB | None | 0 0
  1. module OOPLexer
  2. implicit none
  3.  
  4. integer, parameter :: enum_string_token = 0
  5. integer, parameter :: enum_number_token = 1
  6. integer, parameter :: enum_ident_token = 2
  7. integer, parameter :: enum_eof_token = 3
  8. integer, parameter :: enum_unknown_token = 4
  9.  
  10. type STRING_BUILDER_TYPE
  11. character, allocatable :: str(:)
  12. integer :: len, cap
  13. end type STRING_BUILDER_TYPE
  14.  
  15. type POSITION_TYPE
  16. integer :: line, pos, index
  17. type(STRING_BUILDER_TYPE), pointer :: text
  18. end type POSITION_TYPE
  19.  
  20. type FRAGMENT_TYPE
  21. type(POSITION_TYPE) :: starting, following
  22. end type FRAGMENT_TYPE
  23.  
  24. type TOKEN_TYPE
  25. integer :: tag
  26. type(FRAGMENT_TYPE) :: coords
  27. end type TOKEN_TYPE
  28.  
  29. type IDENT_TOKEN_TYPE
  30. type(TOKEN_TYPE) :: tok
  31. type(STRING_BUILDER_TYPE), pointer :: value
  32. end type IDENT_TOKEN_TYPE
  33.  
  34. type NUMBER_TOKEN_TYPE
  35. type(TOKEN_TYPE) :: tok
  36. type(STRING_BUILDER_TYPE), pointer :: value
  37. end type NUMBER_TOKEN_TYPE
  38.  
  39. type STRING_TOKEN_TYPE
  40. type(TOKEN_TYPE) :: tok
  41. type(STRING_BUILDER_TYPE), pointer :: value
  42. end type STRING_TOKEN_TYPE
  43.  
  44. type UNKNOWN_TOKEN_TYPE
  45. type(TOKEN_TYPE) :: tok
  46. end type UNKNOWN_TOKEN_TYPE
  47.  
  48. type MSG_TYPE
  49. integer :: t
  50. type(STRING_BUILDER_TYPE) :: msg
  51. end type MSG_TYPE
  52.  
  53. type COMPILER_TYPE
  54. integer :: len, cap
  55. type(MSG_TYPE), allocatable :: msgs(:)
  56. end type COMPILER_TYPE
  57.  
  58. type SCANNER_TYPE
  59. type(STRING_BUILDER_TYPE), pointer :: text
  60. type(POSITION_TYPE), pointer :: cur_pos
  61. type(COMPILER_TYPE), pointer :: compiler
  62. end type SCANNER_TYPE
  63.  
  64. type TOKEN_RET_TYPE
  65. integer :: enum_token
  66. type(STRING_TOKEN_TYPE), pointer :: string_token
  67. type(NUMBER_TOKEN_TYPE), pointer :: number_token
  68. type(IDENT_TOKEN_TYPE), pointer :: ident_token
  69. type(UNKNOWN_TOKEN_TYPE), pointer :: unk_token
  70. end type TOKEN_RET_TYPE
  71.  
  72. contains
  73. subroutine string_builder_push_back(string_builder, ch)
  74. type(STRING_BUILDER_TYPE), pointer :: string_builder
  75. character :: ch
  76. character, allocatable :: tmp(:)
  77. integer :: i
  78. string_builder%len = string_builder%len + 1
  79. if (string_builder%len > string_builder%cap) then
  80. allocate(tmp(string_builder%cap))
  81. i = 1
  82. do while (i <= string_builder%cap)
  83. tmp(i) = string_builder%str(i)
  84. i = i + 1
  85. enddo
  86. deallocate(string_builder%str)
  87. allocate(string_builder%str(string_builder%cap * 2))
  88. i = 1
  89. do while (i <= string_builder%cap)
  90. string_builder%str(i) = tmp(i)
  91. i = i + 1
  92. enddo
  93. deallocate(tmp)
  94. string_builder%str(string_builder%len) = ch
  95. string_builder%cap = string_builder%cap * 2
  96. else
  97. string_builder%str(string_builder%len) = ch
  98. endif
  99. end subroutine string_builder_push_back
  100.  
  101. subroutine string_builder_cut_end(string_builder)
  102. type(STRING_BUILDER_TYPE), pointer :: string_builder
  103. character, allocatable :: tmp(:)
  104. integer :: i
  105. if (string_builder%len < string_builder%cap) then
  106. allocate(tmp(string_builder%len))
  107. i = 1
  108. do while (i <= string_builder%len)
  109. tmp(i) = string_builder%str(i)
  110. i = i + 1
  111. enddo
  112. deallocate(string_builder%str)
  113. string_builder%str = tmp
  114. deallocate(tmp)
  115. string_builder%cap = string_builder%len
  116. endif
  117. end subroutine string_builder_cut_end
  118.  
  119. function new_pos(len, pos, index, text_ptr)
  120. integer :: len, pos, index
  121. type(STRING_BUILDER_TYPE), pointer :: text_ptr
  122. type(POSITION_TYPE) :: new_pos
  123. new_pos = POSITION_TYPE(len, pos, index, text_ptr)
  124. end function new_pos
  125.  
  126. function is_pos_eof(pos)
  127. type(POSITION_TYPE), pointer :: pos
  128. logical :: is_pos_eof
  129. is_pos_eof = .false.
  130. if (pos%index >= pos%text%len) then
  131. is_pos_eof = .true.
  132. endif
  133. end function is_pos_eof
  134.  
  135. function pos_get_code(pos)
  136. type(POSITION_TYPE), pointer :: pos
  137. character :: pos_get_code
  138. pos_get_code = pos%text%str(pos%index)
  139. end function pos_get_code
  140.  
  141. function is_pos_newline(pos)
  142. type(POSITION_TYPE), pointer :: pos
  143. character :: code1, code2
  144. logical :: is_pos_newline
  145. is_pos_newline = .false.
  146. code1 = pos_get_code(pos)
  147. if (pos%index == (pos%text%len + 1)) then
  148. is_pos_newline = .true.
  149. return
  150. endif
  151. code2 = pos%text%str(pos%index + 1)
  152. if ((code1 == char(10)) .or. ((code1 == char(10)) .and. (code2 == char(13)))) then
  153. is_pos_newline = .true.
  154. endif
  155. end function is_pos_newline
  156.  
  157. function is_pos_whitespace(pos)
  158. type(POSITION_TYPE), pointer :: pos
  159. logical :: is_pos_whitespace
  160. character :: code
  161. code = pos_get_code(pos)
  162. is_pos_whitespace = .false.
  163. if ((.not. is_pos_eof(pos)) .and. ((code == ' ') .or. (code == char(9)) .or. (is_pos_newline(pos)))) then
  164. is_pos_whitespace = .true.
  165. endif
  166. end function is_pos_whitespace
  167.  
  168. function is_pos_decimal_digit(pos)
  169. type(POSITION_TYPE), pointer :: pos
  170. logical :: is_pos_decimal_digit
  171. character :: code
  172. code = pos_get_code(pos)
  173. is_pos_decimal_digit = .false.
  174. if ((.not. is_pos_eof(pos)) .and. (&
  175. (code == '0') &
  176. .or. (code == '1') &
  177. .or. (code == '2') &
  178. .or. (code == '3') &
  179. .or. (code == '4') &
  180. .or. (code == '5') &
  181. .or. (code == '6') &
  182. .or. (code == '7') &
  183. .or. (code == '8') &
  184. .or. (code == '9') &
  185. )) then
  186. is_pos_decimal_digit = .true.
  187. endif
  188. end function is_pos_decimal_digit
  189.  
  190. function pos_next(pos) result(p)
  191. type(POSITION_TYPE), pointer :: pos
  192. type(POSITION_TYPE), pointer :: p
  193. allocate(p)
  194. p = POSITION_TYPE(pos%line, pos%pos, pos%index, pos%text)
  195. if (.not. is_pos_eof(p)) then
  196. if (is_pos_newline(p)) then
  197. if (pos_get_code(p) == char(13)) then
  198. p%index = p%index + 1
  199. endif
  200. p%line = p%line + 1
  201. p%pos = 1
  202. else
  203. p%pos = p%pos + 1
  204. endif
  205. p%index = p%index + 1
  206. endif
  207. end function pos_next
  208.  
  209. function new_scanner(text_ptr)
  210. type(STRING_BUILDER_TYPE), pointer :: text_ptr
  211. type(POSITION_TYPE), pointer :: cur_pos
  212. type(SCANNER_TYPE), pointer :: new_scanner
  213. type(COMPILER_TYPE), pointer :: compiler
  214. allocate(cur_pos)
  215. cur_pos = new_pos(1, 1, 1, text_ptr)
  216. allocate(compiler)
  217. compiler = COMPILER_TYPE(0, 128, null())
  218. allocate(compiler%msgs(compiler%cap))
  219. allocate(new_scanner)
  220. new_scanner = SCANNER_TYPE(text_ptr, cur_pos, compiler)
  221. end function new_scanner
  222.  
  223. function read_string_token(scanner)
  224. type(SCANNER_TYPE), pointer :: scanner
  225. type(POSITION_TYPE), pointer :: p, p_tmp
  226. type(POSITION_TYPE), pointer :: pp
  227. type(STRING_BUILDER_TYPE), pointer :: value
  228. type(STRING_TOKEN_TYPE), pointer :: read_string_token
  229. character :: code1, code2
  230. type(POSITION_TYPE) :: starting, following
  231. type(FRAGMENT_TYPE) :: fragment
  232. type(TOKEN_TYPE) :: tok
  233. allocate(value)
  234. value = STRING_BUILDER_TYPE(null(), 0, 1)
  235. allocate(value%str(value%cap))
  236. p => pos_next(scanner%cur_pos)
  237. 1 if (is_pos_eof(p)) then
  238. scanner%compiler%len = scanner%compiler%len + 1
  239. scanner%compiler%msgs(scanner%compiler%len)%t = 1
  240. goto 2
  241. endif
  242. code1 = pos_get_code(p)
  243. pp => pos_next(p)
  244. code2 = pos_get_code(pp)
  245. if ((code1 == '`') .and. (code2 == '`')) then
  246. call string_builder_push_back(value, '`')
  247. p_tmp => p
  248. p => pos_next(p)
  249. deallocate(p_tmp)
  250. p_tmp => p
  251. p => pos_next(p)
  252. deallocate(p_tmp)
  253. else if ((code1 == '`') .and. (code2 /= '`')) then
  254. p_tmp => p
  255. p => pos_next(p)
  256. deallocate(p_tmp)
  257. deallocate(pp)
  258. goto 2
  259. else if (.not. is_pos_eof(p)) then
  260. call string_builder_push_back(value, code1)
  261. p_tmp => p
  262. p => pos_next(p)
  263. deallocate(p_tmp)
  264. endif
  265. deallocate(pp)
  266. goto 1
  267. 2 call string_builder_cut_end(value)
  268. allocate(read_string_token)
  269. starting = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
  270. following = POSITION_TYPE(p%line, p%pos, p%index, p%text)
  271. fragment = FRAGMENT_TYPE(starting, following)
  272. tok = TOKEN_TYPE(enum_string_token, fragment)
  273. deallocate(p)
  274. read_string_token = STRING_TOKEN_TYPE(tok, value)
  275. end function read_string_token
  276.  
  277. function read_number_token(scanner)
  278. type(SCANNER_TYPE), pointer :: scanner
  279. logical :: is_binary
  280. type(STRING_BUILDER_TYPE), pointer :: value
  281. type(POSITION_TYPE), pointer :: p, p_tmp
  282. type(NUMBER_TOKEN_TYPE), pointer :: read_number_token
  283. type(POSITION_TYPE) :: starting, following
  284. type(FRAGMENT_TYPE) :: fragment
  285. type(TOKEN_TYPE) :: tok
  286. character :: code
  287. allocate(value)
  288. value = STRING_BUILDER_TYPE(null(), 0, 1)
  289. allocate(value%str(value%cap))
  290. allocate(p)
  291. p = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
  292. is_binary = .true.
  293. 1 code = pos_get_code(p)
  294. if (is_pos_decimal_digit(p)) then
  295. if ((code /= '0') .and. (code /= '1')) then
  296. is_binary = .false.
  297. endif
  298. call string_builder_push_back(value, code)
  299. else if (code == 'b') then
  300. if (is_binary) then
  301. call string_builder_push_back(value, code)
  302. p_tmp => p
  303. p => pos_next(p)
  304. deallocate(p_tmp)
  305. goto 2
  306. else
  307. goto 2
  308. endif
  309. else
  310. goto 2
  311. endif
  312. p_tmp => p
  313. p => pos_next(p)
  314. deallocate(p_tmp)
  315. goto 1
  316. 2 call string_builder_cut_end(value)
  317. allocate(read_number_token)
  318. starting = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
  319. following = POSITION_TYPE(p%line, p%pos, p%index, p%text)
  320. fragment = FRAGMENT_TYPE(starting, following)
  321. tok = TOKEN_TYPE(enum_number_token, fragment)
  322. deallocate(p)
  323. read_number_token = NUMBER_TOKEN_TYPE(tok, value)
  324. end function read_number_token
  325.  
  326.  
  327. function read_ident_token(scanner)
  328. type(SCANNER_TYPE), pointer :: scanner
  329. type(STRING_BUILDER_TYPE), pointer :: value
  330. type(POSITION_TYPE), pointer :: p, p_tmp
  331. type(IDENT_TOKEN_TYPE), pointer :: read_ident_token
  332. type(POSITION_TYPE) :: starting, following
  333. type(FRAGMENT_TYPE) :: fragment
  334. type(TOKEN_TYPE) :: tok
  335. character :: code
  336. allocate(value)
  337. value = STRING_BUILDER_TYPE(null(), 0, 1)
  338. allocate(value%str(value%cap))
  339. allocate(p)
  340. p = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
  341. code = pos_get_code(p)
  342. call string_builder_push_back(value, code)
  343. p_tmp => p
  344. p => pos_next(p)
  345. deallocate(p_tmp)
  346. if (is_pos_eof(p)) then
  347. goto 1
  348. endif
  349. code = pos_get_code(p)
  350. do while (is_pos_decimal_digit(p) .or. (code == '?') .or. (code == '*') .or. (code == '|'))
  351. call string_builder_push_back(value, code)
  352. p_tmp => p
  353. p => pos_next(p)
  354. deallocate(p_tmp)
  355. if (is_pos_eof(p)) then
  356. goto 1
  357. endif
  358. code = pos_get_code(p)
  359. enddo
  360. 1 call string_builder_cut_end(value)
  361. allocate(read_ident_token)
  362. starting = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
  363. following = POSITION_TYPE(p%line, p%pos, p%index, p%text)
  364. fragment = FRAGMENT_TYPE(starting, following)
  365. tok = TOKEN_TYPE(enum_ident_token, fragment)
  366. deallocate(p)
  367. read_ident_token = IDENT_TOKEN_TYPE(tok, value)
  368. end function read_ident_token
  369.  
  370. subroutine read_unknown_token(scanner)
  371. type(SCANNER_TYPE), pointer :: scanner
  372. type(STRING_BUILDER_TYPE), target :: value
  373. type(STRING_BUILDER_TYPE), pointer :: value_ptr
  374. type(POSITION_TYPE), pointer :: p_tmp
  375. value = STRING_BUILDER_TYPE(null(), 0, 1)
  376. allocate(value%str(value%cap))
  377. value_ptr => value
  378. do while ((.not. is_pos_eof(scanner%cur_pos)) .and. (.not. is_pos_newline(scanner%cur_pos))&
  379. .and. (.not. is_pos_decimal_digit(scanner%cur_pos) .and. (pos_get_code(scanner%cur_pos) /= '|')&
  380. .and. (pos_get_code(scanner%cur_pos) /= '?') .and. (pos_get_code(scanner%cur_pos) /= '?')&
  381. .and. (pos_get_code(scanner%cur_pos) /= '`')))
  382. call string_builder_push_back(value_ptr, pos_get_code(scanner%cur_pos))
  383. p_tmp => scanner%cur_pos
  384. scanner%cur_pos => pos_next(scanner%cur_pos)
  385. deallocate(p_tmp)
  386. enddo
  387. call string_builder_cut_end(value_ptr)
  388. scanner%compiler%len = scanner%compiler%len + 1
  389. scanner%compiler%msgs(scanner%compiler%len)%msg = value
  390. scanner%compiler%msgs(scanner%compiler%len)%t = -1
  391. end subroutine read_unknown_token
  392.  
  393. function next_token(scanner) result(ret_token)
  394. type(SCANNER_TYPE), pointer :: scanner
  395. type(POSITION_TYPE) :: starting, following
  396. type(FRAGMENT_TYPE) :: fragment
  397. type(TOKEN_TYPE) :: tok
  398. type(TOKEN_RET_TYPE), pointer :: ret_token
  399. logical :: unk, ret_is_null
  400. type(POSITION_TYPE), pointer :: p_tmp
  401. allocate(ret_token)
  402. ret_token%string_token => null()
  403. ret_token%number_token => null()
  404. ret_token%ident_token => null()
  405. ret_token%unk_token => null()
  406. unk = .false.
  407. do while (.not. is_pos_eof(scanner%cur_pos))
  408. do while (is_pos_whitespace(scanner%cur_pos))
  409. p_tmp => scanner%cur_pos
  410. scanner%cur_pos => pos_next(scanner%cur_pos)
  411. deallocate(p_tmp)
  412. enddo
  413. ret_is_null = .true.
  414. select case(pos_get_code(scanner%cur_pos))
  415. case ('`')
  416. ret_token%enum_token = enum_string_token
  417. ret_token%string_token => read_string_token(scanner)
  418. ret_is_null = .false.
  419. case default
  420. if (is_pos_decimal_digit(scanner%cur_pos)) then
  421. ret_token%enum_token = enum_number_token
  422. ret_token%number_token => read_number_token(scanner)
  423. ret_is_null = .false.
  424. else if ((pos_get_code(scanner%cur_pos) == '|')&
  425. .or. (pos_get_code(scanner%cur_pos) == '?')&
  426. .or. (pos_get_code(scanner%cur_pos) == '*')) then
  427. ret_token%enum_token = enum_ident_token
  428. ret_token%ident_token => read_ident_token(scanner)
  429. ret_is_null = .false.
  430. else
  431. unk = .true.
  432. endif
  433. endselect
  434. if (ret_is_null .or. unk) then
  435. call read_unknown_token(scanner)
  436. unk = .false.
  437. else
  438. if (ret_token%enum_token == enum_string_token) then
  439. scanner%cur_pos%line = ret_token%string_token%tok%coords%following%line
  440. scanner%cur_pos%pos = ret_token%string_token%tok%coords%following%pos
  441. scanner%cur_pos%index = ret_token%string_token%tok%coords%following%index
  442. else if (ret_token%enum_token == enum_number_token) then
  443. scanner%cur_pos%line = ret_token%number_token%tok%coords%following%line
  444. scanner%cur_pos%pos = ret_token%number_token%tok%coords%following%pos
  445. scanner%cur_pos%index = ret_token%number_token%tok%coords%following%index
  446. else
  447. scanner%cur_pos%line = ret_token%ident_token%tok%coords%following%line
  448. scanner%cur_pos%pos = ret_token%ident_token%tok%coords%following%pos
  449. scanner%cur_pos%index = ret_token%ident_token%tok%coords%following%index
  450. endif
  451. goto 1
  452. endif
  453. enddo
  454. allocate(ret_token%unk_token)
  455. starting = POSITION_TYPE(0, 0, 0, null())
  456. following = POSITION_TYPE(0, 0, 0, null())
  457. fragment = FRAGMENT_TYPE(starting, following)
  458. tok = TOKEN_TYPE(enum_eof_token, fragment)
  459. ret_token%enum_token = enum_eof_token
  460. ret_token%unk_token = UNKNOWN_TOKEN_TYPE(tok)
  461. 1 return
  462. end function next_token
  463.  
  464. end module OOPLexer
  465.  
  466. program main
  467. use OOPLexer
  468. implicit none
  469.  
  470. character :: ch
  471. integer :: read_flag
  472. type(STRING_BUILDER_TYPE), pointer :: text
  473. type(SCANNER_TYPE), pointer :: scanner
  474. type(TOKEN_RET_TYPE), pointer :: token
  475. integer :: i
  476. allocate(text)
  477. text = STRING_BUILDER_TYPE(null(), 0, 1)
  478. allocate(text%str(text%cap))
  479. open (2, file = './input.txt', status='old', access='stream', form='unformatted')
  480. read_flag = 0
  481. do while(read_flag == 0)
  482. read(2, iostat=read_flag) ch
  483. if (read_flag == 0) then
  484. call string_builder_push_back(text, ch)
  485. endif
  486. enddo
  487. close(2)
  488. call string_builder_cut_end(text)
  489.  
  490. scanner => new_scanner(text)
  491. 1 token => next_token(scanner)
  492. if (token%enum_token == enum_eof_token) then
  493. goto 2
  494. endif
  495.  
  496. if (token%enum_token == enum_string_token) then
  497. write (0, '(A,I2.1,A,I2.1,A,I2.1,A,I2.1,A)')&
  498. 'STRING(', token%string_token%tok%coords%starting%line, ', &
  499. &', token%string_token%tok%coords%starting%pos, ') - (&
  500. &', token%string_token%tok%coords%following%line, ', &
  501. &', token%string_token%tok%coords%following%pos, ') : '
  502. print *, token%string_token%value%str
  503. deallocate(token%string_token%value%str)
  504. deallocate(token%string_token%value)
  505. deallocate(token%string_token)
  506. deallocate(token)
  507. else if (token%enum_token == enum_number_token) then
  508. write (0, '(A,I2.1,A,I2.1,A,I2.1,A,I2.1,A)')&
  509. 'NUMBER(', token%number_token%tok%coords%starting%line, ', &
  510. &', token%number_token%tok%coords%starting%pos, ') - (&
  511. &', token%number_token%tok%coords%following%line, ', &
  512. &', token%number_token%tok%coords%following%pos, ') : '
  513. print *, token%number_token%value%str
  514. deallocate(token%number_token%value%str)
  515. deallocate(token%number_token%value)
  516. deallocate(token%number_token)
  517. deallocate(token)
  518. else
  519. write (0, '(A,I2.1,A,I2.1,A,I2.1,A,I2.1,A)')&
  520. 'IDENT(', token%ident_token%tok%coords%starting%line, ', &
  521. &', token%ident_token%tok%coords%starting%pos, ') - (&
  522. &', token%ident_token%tok%coords%following%line, ', &
  523. &', token%ident_token%tok%coords%following%pos, ') : '
  524. print *, token%ident_token%value%str
  525. deallocate(token%ident_token%value%str)
  526. deallocate(token%ident_token%value)
  527. deallocate(token%ident_token)
  528. deallocate(token)
  529. endif
  530.  
  531. goto 1
  532.  
  533. 2 i = 1
  534. do while (i <= scanner%compiler%len)
  535. if (scanner%compiler%msgs(i)%t == -1) then
  536. write (0, '(A,I2.1,A)')&
  537. 'WARNING(', i, ') : UNKNOWN TOKEN'
  538. print *, scanner%compiler%msgs(i)%msg%str
  539. deallocate(scanner%compiler%msgs(i)%msg%str)
  540. else
  541. write (0, '(A,I2.1,A)')&
  542. 'WARNING(', i, ') : EOF WHILE READING STRING'
  543. endif
  544. i = i + 1
  545. enddo
  546. deallocate(token%unk_token)
  547. deallocate(token)
  548. deallocate(scanner%compiler%msgs)
  549. deallocate(scanner%compiler)
  550. deallocate(scanner%cur_pos)
  551. deallocate(scanner)
  552. deallocate(text%str)
  553. deallocate(text)
  554. end program main
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement