Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- module OOPLexer
- implicit none
- integer, parameter :: enum_string_token = 0
- integer, parameter :: enum_number_token = 1
- integer, parameter :: enum_ident_token = 2
- integer, parameter :: enum_eof_token = 3
- integer, parameter :: enum_unknown_token = 4
- type STRING_BUILDER_TYPE
- character, allocatable :: str(:)
- integer :: len, cap
- end type STRING_BUILDER_TYPE
- type POSITION_TYPE
- integer :: line, pos, index
- type(STRING_BUILDER_TYPE), pointer :: text
- end type POSITION_TYPE
- type FRAGMENT_TYPE
- type(POSITION_TYPE) :: starting, following
- end type FRAGMENT_TYPE
- type TOKEN_TYPE
- integer :: tag
- type(FRAGMENT_TYPE) :: coords
- end type TOKEN_TYPE
- type IDENT_TOKEN_TYPE
- type(TOKEN_TYPE) :: tok
- type(STRING_BUILDER_TYPE), pointer :: value
- end type IDENT_TOKEN_TYPE
- type NUMBER_TOKEN_TYPE
- type(TOKEN_TYPE) :: tok
- type(STRING_BUILDER_TYPE), pointer :: value
- end type NUMBER_TOKEN_TYPE
- type STRING_TOKEN_TYPE
- type(TOKEN_TYPE) :: tok
- type(STRING_BUILDER_TYPE), pointer :: value
- end type STRING_TOKEN_TYPE
- type UNKNOWN_TOKEN_TYPE
- type(TOKEN_TYPE) :: tok
- end type UNKNOWN_TOKEN_TYPE
- type MSG_TYPE
- integer :: t
- type(STRING_BUILDER_TYPE) :: msg
- end type MSG_TYPE
- type COMPILER_TYPE
- integer :: len, cap
- type(MSG_TYPE), allocatable :: msgs(:)
- end type COMPILER_TYPE
- type SCANNER_TYPE
- type(STRING_BUILDER_TYPE), pointer :: text
- type(POSITION_TYPE), pointer :: cur_pos
- type(COMPILER_TYPE), pointer :: compiler
- end type SCANNER_TYPE
- type TOKEN_RET_TYPE
- integer :: enum_token
- type(STRING_TOKEN_TYPE), pointer :: string_token
- type(NUMBER_TOKEN_TYPE), pointer :: number_token
- type(IDENT_TOKEN_TYPE), pointer :: ident_token
- type(UNKNOWN_TOKEN_TYPE), pointer :: unk_token
- end type TOKEN_RET_TYPE
- contains
- subroutine string_builder_push_back(string_builder, ch)
- type(STRING_BUILDER_TYPE), pointer :: string_builder
- character :: ch
- character, allocatable :: tmp(:)
- integer :: i
- string_builder%len = string_builder%len + 1
- if (string_builder%len > string_builder%cap) then
- allocate(tmp(string_builder%cap))
- i = 1
- do while (i <= string_builder%cap)
- tmp(i) = string_builder%str(i)
- i = i + 1
- enddo
- deallocate(string_builder%str)
- allocate(string_builder%str(string_builder%cap * 2))
- i = 1
- do while (i <= string_builder%cap)
- string_builder%str(i) = tmp(i)
- i = i + 1
- enddo
- deallocate(tmp)
- string_builder%str(string_builder%len) = ch
- string_builder%cap = string_builder%cap * 2
- else
- string_builder%str(string_builder%len) = ch
- endif
- end subroutine string_builder_push_back
- subroutine string_builder_cut_end(string_builder)
- type(STRING_BUILDER_TYPE), pointer :: string_builder
- character, allocatable :: tmp(:)
- integer :: i
- if (string_builder%len < string_builder%cap) then
- allocate(tmp(string_builder%len))
- i = 1
- do while (i <= string_builder%len)
- tmp(i) = string_builder%str(i)
- i = i + 1
- enddo
- deallocate(string_builder%str)
- string_builder%str = tmp
- deallocate(tmp)
- string_builder%cap = string_builder%len
- endif
- end subroutine string_builder_cut_end
- function new_pos(len, pos, index, text_ptr)
- integer :: len, pos, index
- type(STRING_BUILDER_TYPE), pointer :: text_ptr
- type(POSITION_TYPE) :: new_pos
- new_pos = POSITION_TYPE(len, pos, index, text_ptr)
- end function new_pos
- function is_pos_eof(pos)
- type(POSITION_TYPE), pointer :: pos
- logical :: is_pos_eof
- is_pos_eof = .false.
- if (pos%index >= pos%text%len) then
- is_pos_eof = .true.
- endif
- end function is_pos_eof
- function pos_get_code(pos)
- type(POSITION_TYPE), pointer :: pos
- character :: pos_get_code
- pos_get_code = pos%text%str(pos%index)
- end function pos_get_code
- function is_pos_newline(pos)
- type(POSITION_TYPE), pointer :: pos
- character :: code1, code2
- logical :: is_pos_newline
- is_pos_newline = .false.
- code1 = pos_get_code(pos)
- if (pos%index == (pos%text%len + 1)) then
- is_pos_newline = .true.
- return
- endif
- code2 = pos%text%str(pos%index + 1)
- if ((code1 == char(10)) .or. ((code1 == char(10)) .and. (code2 == char(13)))) then
- is_pos_newline = .true.
- endif
- end function is_pos_newline
- function is_pos_whitespace(pos)
- type(POSITION_TYPE), pointer :: pos
- logical :: is_pos_whitespace
- character :: code
- code = pos_get_code(pos)
- is_pos_whitespace = .false.
- if ((.not. is_pos_eof(pos)) .and. ((code == ' ') .or. (code == char(9)) .or. (is_pos_newline(pos)))) then
- is_pos_whitespace = .true.
- endif
- end function is_pos_whitespace
- function is_pos_decimal_digit(pos)
- type(POSITION_TYPE), pointer :: pos
- logical :: is_pos_decimal_digit
- character :: code
- code = pos_get_code(pos)
- is_pos_decimal_digit = .false.
- if ((.not. is_pos_eof(pos)) .and. (&
- (code == '0') &
- .or. (code == '1') &
- .or. (code == '2') &
- .or. (code == '3') &
- .or. (code == '4') &
- .or. (code == '5') &
- .or. (code == '6') &
- .or. (code == '7') &
- .or. (code == '8') &
- .or. (code == '9') &
- )) then
- is_pos_decimal_digit = .true.
- endif
- end function is_pos_decimal_digit
- function pos_next(pos) result(p)
- type(POSITION_TYPE), pointer :: pos
- type(POSITION_TYPE), pointer :: p
- allocate(p)
- p = POSITION_TYPE(pos%line, pos%pos, pos%index, pos%text)
- if (.not. is_pos_eof(p)) then
- if (is_pos_newline(p)) then
- if (pos_get_code(p) == char(13)) then
- p%index = p%index + 1
- endif
- p%line = p%line + 1
- p%pos = 1
- else
- p%pos = p%pos + 1
- endif
- p%index = p%index + 1
- endif
- end function pos_next
- function new_scanner(text_ptr)
- type(STRING_BUILDER_TYPE), pointer :: text_ptr
- type(POSITION_TYPE), pointer :: cur_pos
- type(SCANNER_TYPE), pointer :: new_scanner
- type(COMPILER_TYPE), pointer :: compiler
- allocate(cur_pos)
- cur_pos = new_pos(1, 1, 1, text_ptr)
- allocate(compiler)
- compiler = COMPILER_TYPE(0, 128, null())
- allocate(compiler%msgs(compiler%cap))
- allocate(new_scanner)
- new_scanner = SCANNER_TYPE(text_ptr, cur_pos, compiler)
- end function new_scanner
- function read_string_token(scanner)
- type(SCANNER_TYPE), pointer :: scanner
- type(POSITION_TYPE), pointer :: p, p_tmp
- type(POSITION_TYPE), pointer :: pp
- type(STRING_BUILDER_TYPE), pointer :: value
- type(STRING_TOKEN_TYPE), pointer :: read_string_token
- character :: code1, code2
- type(POSITION_TYPE) :: starting, following
- type(FRAGMENT_TYPE) :: fragment
- type(TOKEN_TYPE) :: tok
- allocate(value)
- value = STRING_BUILDER_TYPE(null(), 0, 1)
- allocate(value%str(value%cap))
- p => pos_next(scanner%cur_pos)
- 1 if (is_pos_eof(p)) then
- scanner%compiler%len = scanner%compiler%len + 1
- scanner%compiler%msgs(scanner%compiler%len)%t = 1
- goto 2
- endif
- code1 = pos_get_code(p)
- pp => pos_next(p)
- code2 = pos_get_code(pp)
- if ((code1 == '`') .and. (code2 == '`')) then
- call string_builder_push_back(value, '`')
- p_tmp => p
- p => pos_next(p)
- deallocate(p_tmp)
- p_tmp => p
- p => pos_next(p)
- deallocate(p_tmp)
- else if ((code1 == '`') .and. (code2 /= '`')) then
- p_tmp => p
- p => pos_next(p)
- deallocate(p_tmp)
- deallocate(pp)
- goto 2
- else if (.not. is_pos_eof(p)) then
- call string_builder_push_back(value, code1)
- p_tmp => p
- p => pos_next(p)
- deallocate(p_tmp)
- endif
- deallocate(pp)
- goto 1
- 2 call string_builder_cut_end(value)
- allocate(read_string_token)
- starting = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
- following = POSITION_TYPE(p%line, p%pos, p%index, p%text)
- fragment = FRAGMENT_TYPE(starting, following)
- tok = TOKEN_TYPE(enum_string_token, fragment)
- deallocate(p)
- read_string_token = STRING_TOKEN_TYPE(tok, value)
- end function read_string_token
- function read_number_token(scanner)
- type(SCANNER_TYPE), pointer :: scanner
- logical :: is_binary
- type(STRING_BUILDER_TYPE), pointer :: value
- type(POSITION_TYPE), pointer :: p, p_tmp
- type(NUMBER_TOKEN_TYPE), pointer :: read_number_token
- type(POSITION_TYPE) :: starting, following
- type(FRAGMENT_TYPE) :: fragment
- type(TOKEN_TYPE) :: tok
- character :: code
- allocate(value)
- value = STRING_BUILDER_TYPE(null(), 0, 1)
- allocate(value%str(value%cap))
- allocate(p)
- p = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
- is_binary = .true.
- 1 code = pos_get_code(p)
- if (is_pos_decimal_digit(p)) then
- if ((code /= '0') .and. (code /= '1')) then
- is_binary = .false.
- endif
- call string_builder_push_back(value, code)
- else if (code == 'b') then
- if (is_binary) then
- call string_builder_push_back(value, code)
- p_tmp => p
- p => pos_next(p)
- deallocate(p_tmp)
- goto 2
- else
- goto 2
- endif
- else
- goto 2
- endif
- p_tmp => p
- p => pos_next(p)
- deallocate(p_tmp)
- goto 1
- 2 call string_builder_cut_end(value)
- allocate(read_number_token)
- starting = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
- following = POSITION_TYPE(p%line, p%pos, p%index, p%text)
- fragment = FRAGMENT_TYPE(starting, following)
- tok = TOKEN_TYPE(enum_number_token, fragment)
- deallocate(p)
- read_number_token = NUMBER_TOKEN_TYPE(tok, value)
- end function read_number_token
- function read_ident_token(scanner)
- type(SCANNER_TYPE), pointer :: scanner
- type(STRING_BUILDER_TYPE), pointer :: value
- type(POSITION_TYPE), pointer :: p, p_tmp
- type(IDENT_TOKEN_TYPE), pointer :: read_ident_token
- type(POSITION_TYPE) :: starting, following
- type(FRAGMENT_TYPE) :: fragment
- type(TOKEN_TYPE) :: tok
- character :: code
- allocate(value)
- value = STRING_BUILDER_TYPE(null(), 0, 1)
- allocate(value%str(value%cap))
- allocate(p)
- p = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
- code = pos_get_code(p)
- call string_builder_push_back(value, code)
- p_tmp => p
- p => pos_next(p)
- deallocate(p_tmp)
- if (is_pos_eof(p)) then
- goto 1
- endif
- code = pos_get_code(p)
- do while (is_pos_decimal_digit(p) .or. (code == '?') .or. (code == '*') .or. (code == '|'))
- call string_builder_push_back(value, code)
- p_tmp => p
- p => pos_next(p)
- deallocate(p_tmp)
- if (is_pos_eof(p)) then
- goto 1
- endif
- code = pos_get_code(p)
- enddo
- 1 call string_builder_cut_end(value)
- allocate(read_ident_token)
- starting = POSITION_TYPE(scanner%cur_pos%line, scanner%cur_pos%pos, scanner%cur_pos%index, scanner%cur_pos%text)
- following = POSITION_TYPE(p%line, p%pos, p%index, p%text)
- fragment = FRAGMENT_TYPE(starting, following)
- tok = TOKEN_TYPE(enum_ident_token, fragment)
- deallocate(p)
- read_ident_token = IDENT_TOKEN_TYPE(tok, value)
- end function read_ident_token
- subroutine read_unknown_token(scanner)
- type(SCANNER_TYPE), pointer :: scanner
- type(STRING_BUILDER_TYPE), target :: value
- type(STRING_BUILDER_TYPE), pointer :: value_ptr
- type(POSITION_TYPE), pointer :: p_tmp
- value = STRING_BUILDER_TYPE(null(), 0, 1)
- allocate(value%str(value%cap))
- value_ptr => value
- do while ((.not. is_pos_eof(scanner%cur_pos)) .and. (.not. is_pos_newline(scanner%cur_pos))&
- .and. (.not. is_pos_decimal_digit(scanner%cur_pos) .and. (pos_get_code(scanner%cur_pos) /= '|')&
- .and. (pos_get_code(scanner%cur_pos) /= '?') .and. (pos_get_code(scanner%cur_pos) /= '?')&
- .and. (pos_get_code(scanner%cur_pos) /= '`')))
- call string_builder_push_back(value_ptr, pos_get_code(scanner%cur_pos))
- p_tmp => scanner%cur_pos
- scanner%cur_pos => pos_next(scanner%cur_pos)
- deallocate(p_tmp)
- enddo
- call string_builder_cut_end(value_ptr)
- scanner%compiler%len = scanner%compiler%len + 1
- scanner%compiler%msgs(scanner%compiler%len)%msg = value
- scanner%compiler%msgs(scanner%compiler%len)%t = -1
- end subroutine read_unknown_token
- function next_token(scanner) result(ret_token)
- type(SCANNER_TYPE), pointer :: scanner
- type(POSITION_TYPE) :: starting, following
- type(FRAGMENT_TYPE) :: fragment
- type(TOKEN_TYPE) :: tok
- type(TOKEN_RET_TYPE), pointer :: ret_token
- logical :: unk, ret_is_null
- type(POSITION_TYPE), pointer :: p_tmp
- allocate(ret_token)
- ret_token%string_token => null()
- ret_token%number_token => null()
- ret_token%ident_token => null()
- ret_token%unk_token => null()
- unk = .false.
- do while (.not. is_pos_eof(scanner%cur_pos))
- do while (is_pos_whitespace(scanner%cur_pos))
- p_tmp => scanner%cur_pos
- scanner%cur_pos => pos_next(scanner%cur_pos)
- deallocate(p_tmp)
- enddo
- ret_is_null = .true.
- select case(pos_get_code(scanner%cur_pos))
- case ('`')
- ret_token%enum_token = enum_string_token
- ret_token%string_token => read_string_token(scanner)
- ret_is_null = .false.
- case default
- if (is_pos_decimal_digit(scanner%cur_pos)) then
- ret_token%enum_token = enum_number_token
- ret_token%number_token => read_number_token(scanner)
- ret_is_null = .false.
- else if ((pos_get_code(scanner%cur_pos) == '|')&
- .or. (pos_get_code(scanner%cur_pos) == '?')&
- .or. (pos_get_code(scanner%cur_pos) == '*')) then
- ret_token%enum_token = enum_ident_token
- ret_token%ident_token => read_ident_token(scanner)
- ret_is_null = .false.
- else
- unk = .true.
- endif
- endselect
- if (ret_is_null .or. unk) then
- call read_unknown_token(scanner)
- unk = .false.
- else
- if (ret_token%enum_token == enum_string_token) then
- scanner%cur_pos%line = ret_token%string_token%tok%coords%following%line
- scanner%cur_pos%pos = ret_token%string_token%tok%coords%following%pos
- scanner%cur_pos%index = ret_token%string_token%tok%coords%following%index
- else if (ret_token%enum_token == enum_number_token) then
- scanner%cur_pos%line = ret_token%number_token%tok%coords%following%line
- scanner%cur_pos%pos = ret_token%number_token%tok%coords%following%pos
- scanner%cur_pos%index = ret_token%number_token%tok%coords%following%index
- else
- scanner%cur_pos%line = ret_token%ident_token%tok%coords%following%line
- scanner%cur_pos%pos = ret_token%ident_token%tok%coords%following%pos
- scanner%cur_pos%index = ret_token%ident_token%tok%coords%following%index
- endif
- goto 1
- endif
- enddo
- allocate(ret_token%unk_token)
- starting = POSITION_TYPE(0, 0, 0, null())
- following = POSITION_TYPE(0, 0, 0, null())
- fragment = FRAGMENT_TYPE(starting, following)
- tok = TOKEN_TYPE(enum_eof_token, fragment)
- ret_token%enum_token = enum_eof_token
- ret_token%unk_token = UNKNOWN_TOKEN_TYPE(tok)
- 1 return
- end function next_token
- end module OOPLexer
- program main
- use OOPLexer
- implicit none
- character :: ch
- integer :: read_flag
- type(STRING_BUILDER_TYPE), pointer :: text
- type(SCANNER_TYPE), pointer :: scanner
- type(TOKEN_RET_TYPE), pointer :: token
- integer :: i
- allocate(text)
- text = STRING_BUILDER_TYPE(null(), 0, 1)
- allocate(text%str(text%cap))
- open (2, file = './input.txt', status='old', access='stream', form='unformatted')
- read_flag = 0
- do while(read_flag == 0)
- read(2, iostat=read_flag) ch
- if (read_flag == 0) then
- call string_builder_push_back(text, ch)
- endif
- enddo
- close(2)
- call string_builder_cut_end(text)
- scanner => new_scanner(text)
- 1 token => next_token(scanner)
- if (token%enum_token == enum_eof_token) then
- goto 2
- endif
- if (token%enum_token == enum_string_token) then
- write (0, '(A,I2.1,A,I2.1,A,I2.1,A,I2.1,A)')&
- 'STRING(', token%string_token%tok%coords%starting%line, ', &
- &', token%string_token%tok%coords%starting%pos, ') - (&
- &', token%string_token%tok%coords%following%line, ', &
- &', token%string_token%tok%coords%following%pos, ') : '
- print *, token%string_token%value%str
- deallocate(token%string_token%value%str)
- deallocate(token%string_token%value)
- deallocate(token%string_token)
- deallocate(token)
- else if (token%enum_token == enum_number_token) then
- write (0, '(A,I2.1,A,I2.1,A,I2.1,A,I2.1,A)')&
- 'NUMBER(', token%number_token%tok%coords%starting%line, ', &
- &', token%number_token%tok%coords%starting%pos, ') - (&
- &', token%number_token%tok%coords%following%line, ', &
- &', token%number_token%tok%coords%following%pos, ') : '
- print *, token%number_token%value%str
- deallocate(token%number_token%value%str)
- deallocate(token%number_token%value)
- deallocate(token%number_token)
- deallocate(token)
- else
- write (0, '(A,I2.1,A,I2.1,A,I2.1,A,I2.1,A)')&
- 'IDENT(', token%ident_token%tok%coords%starting%line, ', &
- &', token%ident_token%tok%coords%starting%pos, ') - (&
- &', token%ident_token%tok%coords%following%line, ', &
- &', token%ident_token%tok%coords%following%pos, ') : '
- print *, token%ident_token%value%str
- deallocate(token%ident_token%value%str)
- deallocate(token%ident_token%value)
- deallocate(token%ident_token)
- deallocate(token)
- endif
- goto 1
- 2 i = 1
- do while (i <= scanner%compiler%len)
- if (scanner%compiler%msgs(i)%t == -1) then
- write (0, '(A,I2.1,A)')&
- 'WARNING(', i, ') : UNKNOWN TOKEN'
- print *, scanner%compiler%msgs(i)%msg%str
- deallocate(scanner%compiler%msgs(i)%msg%str)
- else
- write (0, '(A,I2.1,A)')&
- 'WARNING(', i, ') : EOF WHILE READING STRING'
- endif
- i = i + 1
- enddo
- deallocate(token%unk_token)
- deallocate(token)
- deallocate(scanner%compiler%msgs)
- deallocate(scanner%compiler)
- deallocate(scanner%cur_pos)
- deallocate(scanner)
- deallocate(text%str)
- deallocate(text)
- end program main
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement