!!! !!! An implementation of the Rosetta Code parser task: !!! https://rosettacode.org/wiki/Compiler/syntax_analyzer !!! !!! The implementation is based on the published pseudocode. !!! module compiler_type_kinds use, intrinsic :: iso_fortran_env, only: int32 use, intrinsic :: iso_fortran_env, only: int64 implicit none private ! Synonyms. integer, parameter, public :: size_kind = int64 integer, parameter, public :: length_kind = size_kind integer, parameter, public :: nk = size_kind ! Synonyms for character capable of storing a Unicode code point. integer, parameter, public :: unicode_char_kind = selected_char_kind ('ISO_10646') integer, parameter, public :: ck = unicode_char_kind ! Synonyms for integers capable of storing a Unicode code point. integer, parameter, public :: unicode_ichar_kind = int32 integer, parameter, public :: ick = unicode_ichar_kind end module compiler_type_kinds module string_buffers use, intrinsic :: iso_fortran_env, only: error_unit use, intrinsic :: iso_fortran_env, only: int64 use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick implicit none private public :: strbuf_t type :: strbuf_t integer(kind = nk), private :: len = 0 ! ! ‘chars’ is made public for efficient access to the individual ! characters. ! character(1, kind = ck), allocatable, public :: chars(:) contains procedure, pass, private :: ensure_storage => strbuf_t_ensure_storage procedure, pass :: to_unicode_full_string => strbuf_t_to_unicode_full_string procedure, pass :: to_unicode_substring => strbuf_t_to_unicode_substring procedure, pass :: length => strbuf_t_length procedure, pass :: set => strbuf_t_set procedure, pass :: append => strbuf_t_append generic :: to_unicode => to_unicode_full_string generic :: to_unicode => to_unicode_substring generic :: assignment(=) => set end type strbuf_t contains function strbuf_t_to_unicode_full_string (strbuf) result (s) class(strbuf_t), intent(in) :: strbuf character(:, kind = ck), allocatable :: s ! ! This does not actually ensure that the string is valid Unicode; ! any 31-bit ‘character’ is supported. ! integer(kind = nk) :: i allocate (character(len = strbuf%len, kind = ck) :: s) do i = 1, strbuf%len s(i:i) = strbuf%chars(i) end do end function strbuf_t_to_unicode_full_string function strbuf_t_to_unicode_substring (strbuf, i, j) result (s) ! ! ‘Extreme’ values of i and j are allowed, as shortcuts for ‘from ! the beginning’, ‘up to the end’, or ‘empty substring’. ! class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i, j character(:, kind = ck), allocatable :: s ! ! This does not actually ensure that the string is valid Unicode; ! any 31-bit ‘character’ is supported. ! integer(kind = nk) :: i1, j1 integer(kind = nk) :: n integer(kind = nk) :: k i1 = max (1_nk, i) j1 = min (strbuf%len, j) n = max (0_nk, (j1 - i1) + 1_nk) allocate (character(n, kind = ck) :: s) do k = 1, n s(k:k) = strbuf%chars(i1 + (k - 1_nk)) end do end function strbuf_t_to_unicode_substring elemental function strbuf_t_length (strbuf) result (n) class(strbuf_t), intent(in) :: strbuf integer(kind = nk) :: n n = strbuf%len end function strbuf_t_length elemental function next_power_of_two (x) result (y) integer(kind = nk), intent(in) :: x integer(kind = nk) :: y ! ! It is assumed that no more than 64 bits are used. ! ! The branch-free algorithm is that of ! https://archive.is/nKxAc#RoundUpPowerOf2 ! ! Fill in bits until one less than the desired power of two is ! reached, and then add one. ! y = x - 1 y = ior (y, ishft (y, -1)) y = ior (y, ishft (y, -2)) y = ior (y, ishft (y, -4)) y = ior (y, ishft (y, -8)) y = ior (y, ishft (y, -16)) y = ior (y, ishft (y, -32)) y = y + 1 end function next_power_of_two elemental function new_storage_size (length_needed) result (size) integer(kind = nk), intent(in) :: length_needed integer(kind = nk) :: size ! Increase storage by orders of magnitude. if (2_nk**32 < length_needed) then size = huge (1_nk) else size = next_power_of_two (length_needed) end if end function new_storage_size subroutine strbuf_t_ensure_storage (strbuf, length_needed) class(strbuf_t), intent(inout) :: strbuf integer(kind = nk), intent(in) :: length_needed integer(kind = nk) :: new_size type(strbuf_t) :: new_strbuf if (.not. allocated (strbuf%chars)) then ! Initialize a new strbuf%chars array. new_size = new_storage_size (length_needed) allocate (strbuf%chars(1:new_size)) else if (ubound (strbuf%chars, 1) < length_needed) then ! Allocate a new strbuf%chars array, larger than the current ! one, but containing the same characters. new_size = new_storage_size (length_needed) allocate (new_strbuf%chars(1:new_size)) new_strbuf%chars(1:strbuf%len) = strbuf%chars(1:strbuf%len) call move_alloc (new_strbuf%chars, strbuf%chars) end if end subroutine strbuf_t_ensure_storage subroutine strbuf_t_set (dst, src) class(strbuf_t), intent(inout) :: dst class(*), intent(in) :: src integer(kind = nk) :: n integer(kind = nk) :: i select type (src) type is (character(*, kind = ck)) n = len (src, kind = nk) call dst%ensure_storage(n) do i = 1, n dst%chars(i) = src(i:i) end do dst%len = n type is (character(*)) n = len (src, kind = nk) call dst%ensure_storage(n) do i = 1, n dst%chars(i) = src(i:i) end do dst%len = n class is (strbuf_t) n = src%len call dst%ensure_storage(n) dst%chars(1:n) = src%chars(1:n) dst%len = n class default error stop end select end subroutine strbuf_t_set subroutine strbuf_t_append (dst, src) class(strbuf_t), intent(inout) :: dst class(*), intent(in) :: src integer(kind = nk) :: n_dst, n_src, n integer(kind = nk) :: i select type (src) type is (character(*, kind = ck)) n_dst = dst%len n_src = len (src, kind = nk) n = n_dst + n_src call dst%ensure_storage(n) do i = 1, n_src dst%chars(n_dst + i) = src(i:i) end do dst%len = n type is (character(*)) n_dst = dst%len n_src = len (src, kind = nk) n = n_dst + n_src call dst%ensure_storage(n) do i = 1, n_src dst%chars(n_dst + i) = src(i:i) end do dst%len = n class is (strbuf_t) n_dst = dst%len n_src = src%len n = n_dst + n_src call dst%ensure_storage(n) dst%chars((n_dst + 1):n) = src%chars(1:n_src) dst%len = n class default error stop end select end subroutine strbuf_t_append end module string_buffers module reading_one_line_from_a_stream use, intrinsic :: iso_fortran_env, only: input_unit use, intrinsic :: iso_fortran_env, only: error_unit use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick use, non_intrinsic :: string_buffers implicit none private ! get_line_from_stream: read an entire input line from a stream into ! a strbuf_t. public :: get_line_from_stream character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck) ! The following is correct for Unix and its relatives. character(1, kind = ck), parameter :: newline_char = linefeed_char contains subroutine get_line_from_stream (unit_no, eof, no_newline, strbuf) integer, intent(in) :: unit_no logical, intent(out) :: eof ! End of file? logical, intent(out) :: no_newline ! There is a line but it has no ! newline? (Thus eof also must ! be .true.) class(strbuf_t), intent(inout) :: strbuf character(1, kind = ck) :: ch strbuf = '' call get_ch (unit_no, eof, ch) do while (.not. eof .and. ch /= newline_char) call strbuf%append (ch) call get_ch (unit_no, eof, ch) end do no_newline = eof .and. (strbuf%length() /= 0) end subroutine get_line_from_stream subroutine get_ch (unit_no, eof, ch) ! ! Read a single code point from the stream. ! ! Currently this procedure simply inputs ‘ASCII’ bytes rather than ! Unicode code points. ! integer, intent(in) :: unit_no logical, intent(out) :: eof character(1, kind = ck), intent(out) :: ch integer :: stat character(1) :: c = '*' eof = .false. if (unit_no == input_unit) then call get_input_unit_char (c, stat) else read (unit = unit_no, iostat = stat) c end if if (stat < 0) then ch = ck_'*' eof = .true. else if (0 < stat) then write (error_unit, '("Input error with status code ", I0)') stat stop 1 else ch = char (ichar (c, kind = ick), kind = ck) end if end subroutine get_ch !!! !!! If you tell gfortran you want -std=f2008 or -std=f2018, you likely !!! will need to add also -fall-intrinsics or -U__GFORTRAN__ !!! !!! The first way, you get the FGETC intrinsic. The latter way, you !!! get the C interface code that uses getchar(3). !!! #ifdef __GFORTRAN__ subroutine get_input_unit_char (c, stat) ! ! The following works if you are using gfortran. ! ! (FGETC is considered a feature for backwards compatibility with ! g77. However, I know of no way to reconfigure input_unit as a ! Fortran 2003 stream, for use with ordinary ‘read’.) ! character, intent(inout) :: c integer, intent(out) :: stat call fgetc (input_unit, c, stat) end subroutine get_input_unit_char #else subroutine get_input_unit_char (c, stat) ! ! An alternative implementation of get_input_unit_char. This ! actually reads input from the C standard input, which might not ! be the same as input_unit. ! use, intrinsic :: iso_c_binding, only: c_int character, intent(inout) :: c integer, intent(out) :: stat interface ! ! Use getchar(3) to read characters from standard input. This ! assumes there is actually such a function available, and that ! getchar(3) does not exist solely as a macro. (One could write ! one’s own getchar() if necessary, of course.) ! function getchar () result (c) bind (c, name = 'getchar') use, intrinsic :: iso_c_binding, only: c_int integer(kind = c_int) :: c end function getchar end interface integer(kind = c_int) :: i_char i_char = getchar () ! ! The C standard requires that EOF have a negative value. If the ! value returned by getchar(3) is not EOF, then it will be ! representable as an unsigned char. Therefore, to check for end ! of file, one need only test whether i_char is negative. ! if (i_char < 0) then stat = -1 else stat = 0 c = char (i_char) end if end subroutine get_input_unit_char #endif end module reading_one_line_from_a_stream module lexer_token_facts implicit none private integer, parameter, public :: tk_EOI = 0 integer, parameter, public :: tk_Mul = 1 integer, parameter, public :: tk_Div = 2 integer, parameter, public :: tk_Mod = 3 integer, parameter, public :: tk_Add = 4 integer, parameter, public :: tk_Sub = 5 integer, parameter, public :: tk_Negate = 6 integer, parameter, public :: tk_Not = 7 integer, parameter, public :: tk_Lss = 8 integer, parameter, public :: tk_Leq = 9 integer, parameter, public :: tk_Gtr = 10 integer, parameter, public :: tk_Geq = 11 integer, parameter, public :: tk_Eq = 12 integer, parameter, public :: tk_Neq = 13 integer, parameter, public :: tk_Assign = 14 integer, parameter, public :: tk_And = 15 integer, parameter, public :: tk_Or = 16 integer, parameter, public :: tk_If = 17 integer, parameter, public :: tk_Else = 18 integer, parameter, public :: tk_While = 19 integer, parameter, public :: tk_Print = 20 integer, parameter, public :: tk_Putc = 21 integer, parameter, public :: tk_Lparen = 22 integer, parameter, public :: tk_Rparen = 23 integer, parameter, public :: tk_Lbrace = 24 integer, parameter, public :: tk_Rbrace = 25 integer, parameter, public :: tk_Semi = 26 integer, parameter, public :: tk_Comma = 27 integer, parameter, public :: tk_Ident = 28 integer, parameter, public :: tk_Integer = 29 integer, parameter, public :: tk_String = 30 integer, parameter, public :: tk_Positive = 31 character(16), parameter, public :: lexer_token_string(0:31) = & (/ "EOI ", & & "* ", & & "/ ", & & "% ", & & "+ ", & & "- ", & & "- ", & & "! ", & & "< ", & & "<= ", & & "> ", & & ">= ", & & "== ", & & "!= ", & & "= ", & & "&& ", & & "|| ", & & "if ", & & "else ", & & "while ", & & "print ", & & "putc ", & & "( ", & & ") ", & & "{ ", & & "} ", & & "; ", & & ", ", & & "Ident ", & & "Integer literal ", & & "String literal ", & & "+ " /) integer, parameter, public :: lexer_token_arity(0:31) = & & (/ -1, & ! EOI & 2, 2, 2, 2, 2, & ! * / % + - & 1, 1, & ! negate ! & 2, 2, 2, 2, 2, 2, & ! < <= > >= == != & -1, & ! = & 2, 2, & ! && || & -1, -1, -1, -1, -1, & ! & -1, -1, -1, -1, -1, & ! & -1, -1, -1, -1, & ! & 1 /) ! positive integer, parameter, public :: lexer_token_precedence(0:31) = & & (/ -1, & ! EOI & 13, 13, 13, & ! * / % & 12, 12, & ! + - & 14, 14, & ! negate ! & 10, 10, 10, 10, & ! < <= > >= & 9, 9, & ! == != & -1, & ! = & 5, & ! && & 4, & ! || & -1, -1, -1, -1, -1, & ! & -1, -1, -1, -1, -1, & ! & -1, -1, -1, -1, & ! & 14 /) ! positive integer, parameter, public :: left_associative = 0 integer, parameter, public :: right_associative = 1 ! All current operators are left associative. (The values in the ! array for things that are not operators are unimportant.) integer, parameter, public :: lexer_token_associativity(0:31) = left_associative end module lexer_token_facts module reading_of_lexer_tokens use, intrinsic :: iso_fortran_env, only: error_unit use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick use, non_intrinsic :: string_buffers use, non_intrinsic :: reading_one_line_from_a_stream use, non_intrinsic :: lexer_token_facts implicit none private public :: lexer_token_t public :: get_lexer_token character(1, kind = ck), parameter :: horizontal_tab_char = char (9, kind = ck) character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck) character(1, kind = ck), parameter :: vertical_tab_char = char (11, kind = ck) character(1, kind = ck), parameter :: formfeed_char = char (12, kind = ck) character(1, kind = ck), parameter :: carriage_return_char = char (13, kind = ck) character(1, kind = ck), parameter :: space_char = ck_' ' type :: lexer_token_t integer :: token_no = -(huge (1)) character(:, kind = ck), allocatable :: val integer(nk) :: line_no = -(huge (1_nk)) integer(nk) :: column_no = -(huge (1_nk)) end type lexer_token_t contains subroutine get_lexer_token (unit_no, lex_line_no, eof, token) ! ! Lines that are empty or contain only whitespace are tolerated. ! ! Also tolerated are comment lines, whose first character is a ! '!'. It is convenient for debugging to be able to comment out ! lines. ! ! A last line be without a newline is *not* tolerated, unless it ! contains only whitespace. ! ! Letting there be some whitespace is partly for the sake of ! reading cut-and-paste from a browser display. ! integer, intent(in) :: unit_no integer(kind = nk), intent(inout) :: lex_line_no logical, intent(out) :: eof type(lexer_token_t), intent(out) :: token type(strbuf_t) :: strbuf logical :: no_newline logical :: input_found ! Let a negative setting initialize the line number. lex_line_no = max (0_nk, lex_line_no) strbuf = '' eof = .false. input_found = .false. do while (.not. eof .and. .not. input_found) call get_line_from_stream (unit_no, eof, no_newline, strbuf) if (eof) then if (no_newline) then lex_line_no = lex_line_no + 1 if (.not. strbuf_is_all_whitespace (strbuf)) then call start_error_message (lex_line_no) write (error_unit, '("lexer line ends without a newline")') stop 1 end if end if else lex_line_no = lex_line_no + 1 input_found = .true. if (strbuf_is_all_whitespace (strbuf)) then ! A blank line. input_found = .false. else if (0 < strbuf%length()) then if (strbuf%chars(1) == ck_'!') then ! A comment line. input_found = .false. end if end if end if end do token = lexer_token_t () if (.not. eof) then token = strbuf_to_token (lex_line_no, strbuf) end if end subroutine get_lexer_token function strbuf_to_token (lex_line_no, strbuf) result (token) integer(kind = nk), intent(in) :: lex_line_no class(strbuf_t), intent(in) :: strbuf type(lexer_token_t) :: token character(:, kind = ck), allocatable :: line_no character(:, kind = ck), allocatable :: column_no character(:, kind = ck), allocatable :: token_name character(:, kind = ck), allocatable :: val_string integer :: stat integer(kind = nk) :: n call split_line (lex_line_no, strbuf, line_no, column_no, token_name, val_string) read (line_no, *, iostat = stat) token%line_no if (stat /= 0) then call start_error_message (lex_line_no) write (error_unit, '("line number field is unreadable or too large")') stop 1 end if read (column_no, *, iostat = stat) token%column_no if (stat /= 0) then call start_error_message (lex_line_no) write (error_unit, '("column number field is unreadable or too large")') stop 1 end if token%token_no = token_name_to_token_no (lex_line_no, token_name) select case (token%token_no) case (tk_Ident) ! I do no checking of identifier names. allocate (token%val, source = val_string) case (tk_Integer) call check_is_all_digits (lex_line_no, val_string) allocate (token%val, source = val_string) case (tk_String) n = len (val_string, kind = nk) if (n < 2) then call string_literal_missing_or_no_good else if (val_string(1:1) /= ck_'"' .or. val_string(n:n) /= ck_'"') then call string_literal_missing_or_no_good else allocate (token%val, source = val_string) end if case default if (len (val_string, kind = nk) /= 0) then call start_error_message (lex_line_no) write (error_unit, '("token should not have a value")') stop 1 end if end select contains subroutine string_literal_missing_or_no_good call start_error_message (lex_line_no) write (error_unit, '("""String"" token requires a string literal")') stop 1 end subroutine string_literal_missing_or_no_good end function strbuf_to_token subroutine split_line (lex_line_no, strbuf, line_no, column_no, token_name, val_string) integer(kind = nk), intent(in) :: lex_line_no class(strbuf_t), intent(in) :: strbuf character(:, kind = ck), allocatable, intent(out) :: line_no character(:, kind = ck), allocatable, intent(out) :: column_no character(:, kind = ck), allocatable, intent(out) :: token_name character(:, kind = ck), allocatable, intent(out) :: val_string integer(kind = nk) :: i, j i = skip_whitespace (strbuf, 1_nk) j = skip_non_whitespace (strbuf, i) line_no = strbuf%to_unicode(i, j - 1) call check_is_all_digits (lex_line_no, line_no) i = skip_whitespace (strbuf, j) j = skip_non_whitespace (strbuf, i) column_no = strbuf%to_unicode(i, j - 1) call check_is_all_digits (lex_line_no, column_no) i = skip_whitespace (strbuf, j) j = skip_non_whitespace (strbuf, i) token_name = strbuf%to_unicode(i, j - 1) i = skip_whitespace (strbuf, j) if (strbuf%length() < i) then val_string = ck_'' else if (strbuf%chars(i) == ck_'"') then j = skip_whitespace_backwards (strbuf, strbuf%length()) if (strbuf%chars(j) == ck_'"') then val_string = strbuf%to_unicode(i, j) else call start_error_message (lex_line_no) write (error_unit, '("string literal does not end in a double quote")') stop 1 end if else j = skip_non_whitespace (strbuf, i) val_string = strbuf%to_unicode(i, j - 1) i = skip_whitespace (strbuf, j) if (i <= strbuf%length()) then call start_error_message (lex_line_no) write (error_unit, '("token line contains unexpected text")') stop 1 end if end if end subroutine split_line function token_name_to_token_no (lex_line_no, token_name) result (token_no) integer(kind = nk), intent(in) :: lex_line_no character(*, kind = ck), intent(in) :: token_name integer :: token_no !! !! This implementation is not optimized in any way, unless the !! Fortran compiler can optimize the SELECT CASE. !! select case (token_name) case (ck_"End_of_input") token_no = tk_EOI case (ck_"Op_multiply") token_no = tk_Mul case (ck_"Op_divide") token_no = tk_Div case (ck_"Op_mod") token_no = tk_Mod case (ck_"Op_add") token_no = tk_Add case (ck_"Op_subtract") token_no = tk_Sub case (ck_"Op_negate") token_no = tk_Negate case (ck_"Op_not") token_no = tk_Not case (ck_"Op_less") token_no = tk_Lss case (ck_"Op_lessequal ") token_no = tk_Leq case (ck_"Op_greater") token_no = tk_Gtr case (ck_"Op_greaterequal") token_no = tk_Geq case (ck_"Op_equal") token_no = tk_Eq case (ck_"Op_notequal") token_no = tk_Neq case (ck_"Op_assign") token_no = tk_Assign case (ck_"Op_and") token_no = tk_And case (ck_"Op_or") token_no = tk_Or case (ck_"Keyword_if") token_no = tk_If case (ck_"Keyword_else") token_no = tk_Else case (ck_"Keyword_while") token_no = tk_While case (ck_"Keyword_print") token_no = tk_Print case (ck_"Keyword_putc") token_no = tk_Putc case (ck_"LeftParen") token_no = tk_Lparen case (ck_"RightParen") token_no = tk_Rparen case (ck_"LeftBrace") token_no = tk_Lbrace case (ck_"RightBrace") token_no = tk_Rbrace case (ck_"Semicolon") token_no = tk_Semi case (ck_"Comma") token_no = tk_Comma case (ck_"Identifier") token_no = tk_Ident case (ck_"Integer") token_no = tk_Integer case (ck_"String") token_no = tk_String case default call start_error_message (lex_line_no) write (error_unit, '("unrecognized token name: ", A)') token_name stop 1 end select end function token_name_to_token_no function skip_whitespace (strbuf, i) result (j) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i integer(kind = nk) :: j logical :: done j = i done = .false. do while (.not. done) if (at_end_of_line (strbuf, j)) then done = .true. else if (.not. isspace (strbuf%chars(j))) then done = .true. else j = j + 1 end if end do end function skip_whitespace function skip_non_whitespace (strbuf, i) result (j) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i integer(kind = nk) :: j logical :: done j = i done = .false. do while (.not. done) if (at_end_of_line (strbuf, j)) then done = .true. else if (isspace (strbuf%chars(j))) then done = .true. else j = j + 1 end if end do end function skip_non_whitespace function skip_whitespace_backwards (strbuf, i) result (j) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i integer(kind = nk) :: j logical :: done j = i done = .false. do while (.not. done) if (j == -1) then done = .true. else if (.not. isspace (strbuf%chars(j))) then done = .true. else j = j - 1 end if end do end function skip_whitespace_backwards function at_end_of_line (strbuf, i) result (bool) class(strbuf_t), intent(in) :: strbuf integer(kind = nk), intent(in) :: i logical :: bool bool = (strbuf%length() < i) end function at_end_of_line elemental function strbuf_is_all_whitespace (strbuf) result (bool) class(strbuf_t), intent(in) :: strbuf logical :: bool integer(kind = nk) :: n integer(kind = nk) :: i n = strbuf%length() if (n == 0) then bool = .true. else i = 1 bool = .true. do while (bool .and. i /= n + 1) bool = isspace (strbuf%chars(i)) i = i + 1 end do end if end function strbuf_is_all_whitespace elemental function isspace (ch) result (bool) character(1, kind = ck), intent(in) :: ch logical :: bool bool = (ch == horizontal_tab_char) .or. & & (ch == linefeed_char) .or. & & (ch == vertical_tab_char) .or. & & (ch == formfeed_char) .or. & & (ch == carriage_return_char) .or. & & (ch == space_char) end function isspace elemental function isdigit (ch) result (bool) character(1, kind = ck), intent(in) :: ch logical :: bool integer(kind = ick), parameter :: zero = ichar (ck_'0', kind = ick) integer(kind = ick), parameter :: nine = ichar (ck_'9', kind = ick) integer(kind = ick) :: i_ch i_ch = ichar (ch, kind = ick) bool = (zero <= i_ch .and. i_ch <= nine) end function isdigit subroutine check_is_all_digits (lex_line_no, str) integer(kind = nk), intent(in) :: lex_line_no character(*, kind = ck), intent(in) :: str integer(kind = nk) :: n integer(kind = nk) :: i n = len (str, kind = nk) if (n == 0_nk) then call start_error_message (lex_line_no) write (error_unit, '("a required field is missing")') stop 1 else do i = 1, n if (.not. isdigit (str(i:i))) then call start_error_message (lex_line_no) write (error_unit, '("a numeric field contains a non-digit")') stop 1 end if end do end if end subroutine check_is_all_digits subroutine start_error_message (lex_line_no) integer(kind = nk), intent(in) :: lex_line_no write (error_unit, '("Token stream error at line ", I0, ": ")', advance = 'no') & & lex_line_no end subroutine start_error_message end module reading_of_lexer_tokens module syntactic_analysis use, intrinsic :: iso_fortran_env, only: input_unit use, intrinsic :: iso_fortran_env, only: output_unit use, intrinsic :: iso_fortran_env, only: error_unit use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick use, non_intrinsic :: string_buffers use, non_intrinsic :: lexer_token_facts use, non_intrinsic :: reading_of_lexer_tokens implicit none private public :: ast_node_t public :: ast_t public :: parse_token_stream public :: output_ast_flattened integer, parameter, public :: tk_start_of_statement = -1 integer, parameter, public :: tk_primary = -2 integer, parameter :: node_Identifier = 1 integer, parameter :: node_String = 2 integer, parameter :: node_Integer = 3 integer, parameter :: node_Sequence = 4 integer, parameter :: node_If = 5 integer, parameter :: node_Prtc = 6 integer, parameter :: node_Prts = 7 integer, parameter :: node_Prti = 8 integer, parameter :: node_While = 9 integer, parameter :: node_Assign = 10 integer, parameter :: node_Negate = 11 integer, parameter :: node_Not = 12 integer, parameter :: node_Multiply = 13 integer, parameter :: node_Divide = 14 integer, parameter :: node_Mod = 15 integer, parameter :: node_Add = 16 integer, parameter :: node_Subtract = 17 integer, parameter :: node_Less = 18 integer, parameter :: node_LessEqual = 19 integer, parameter :: node_Greater = 20 integer, parameter :: node_GreaterEqual = 21 integer, parameter :: node_Equal = 22 integer, parameter :: node_NotEqual = 23 integer, parameter :: node_And = 24 integer, parameter :: node_Or = 25 character(16), parameter :: node_variety_string(1:25) = & (/ "Identifier ", & & "String ", & & "Integer ", & & "Sequence ", & & "If ", & & "Prtc ", & & "Prts ", & & "Prti ", & & "While ", & & "Assign ", & & "Negate ", & & "Not ", & & "Multiply ", & & "Divide ", & & "Mod ", & & "Add ", & & "Subtract ", & & "Less ", & & "LessEqual ", & & "Greater ", & & "GreaterEqual ", & & "Equal ", & & "NotEqual ", & & "And ", & & "Or " /) type :: ast_node_t integer :: node_variety character(:, kind = ck), allocatable :: val type(ast_node_t), pointer :: left => null () type(ast_node_t), pointer :: right => null () contains procedure, pass :: assign => ast_node_t_assign generic :: assignment(=) => assign final :: ast_node_t_finalize end type ast_node_t ! ast_t phases. integer, parameter :: building = 1 integer, parameter :: completed = 2 type :: ast_t ! ! This type is used to build the subtrees, as well as for the ! completed AST. The difference is in the setting of ‘phase’. ! type(ast_node_t), pointer :: node => null () integer, private :: phase = building contains procedure, pass :: assign => ast_t_assign generic :: assignment(=) => assign final :: ast_t_finalize end type ast_t type(ast_t), parameter :: ast_nil = ast_t (null ()) contains recursive subroutine ast_node_t_assign (node, other) class(ast_node_t), intent(out) :: node class(*), intent(in) :: other select type (other) class is (ast_node_t) node%node_variety = other%node_variety if (allocated (other%val)) allocate (node%val, source = other%val) if (associated (other%left)) allocate (node%left, source = other%left) if (associated (other%right)) allocate (node%right, source = other%right) class default ! This branch should never be reached. error stop end select end subroutine ast_node_t_assign recursive subroutine ast_node_t_finalize (node) type(ast_node_t), intent(inout) :: node if (associated (node%left)) deallocate (node%left) if (associated (node%right)) deallocate (node%right) end subroutine ast_node_t_finalize recursive subroutine ast_t_assign (ast, other) class(ast_t), intent(out) :: ast class(*), intent(in) :: other select type (other) class is (ast_t) if (associated (other%node)) allocate (ast%node, source = other%node) ! ! Whether it is better to set phase to ‘building’ or to set it ! to ‘other%phase’ is unclear to me. Probably ‘building’ is the ! better choice. Which variable controls memory recovery is ! clear and unchanging, in that case: it is the original, ! ‘other’, that does. ! ast%phase = building class default ! This should not happen. error stop end select end subroutine ast_t_assign subroutine ast_t_finalize (ast) type(ast_t), intent(inout) :: ast ! ! When we are building the tree, the tree’s nodes should not be ! deallocated when the ast_t variable temporarily holding them ! goes out of scope. ! ! However, once the AST is completed, we do want the memory ! recovered when the variable goes out of scope. ! ! (Elsewhere I have written a primitive garbage collector for ! Fortran programs, but in this case it would be a lot of overhead ! for little gain. In fact, we could reasonably just let the ! memory leak, in this program. ! ! Fortran runtimes *are* allowed by the standard to have garbage ! collectors built in. To my knowledge, at the time of this ! writing, only NAG Fortran has a garbage collector option.) ! if (ast%phase == completed) then if (associated (ast%node)) deallocate (ast%node) end if end subroutine ast_t_finalize function parse_token_stream (unit_no) result (ast) integer, intent(in) :: unit_no type(ast_t) :: ast integer(kind = nk) :: lex_line_no type(ast_t) :: statement type(lexer_token_t) :: token lex_line_no = -1_nk call get_token (unit_no, lex_line_no, token) call parse_statement (unit_no, lex_line_no, token, statement) ast = make_internal_node (node_Sequence, ast, statement) do while (token%token_no /= tk_EOI) call parse_statement (unit_no, lex_line_no, token, statement) ast = make_internal_node (node_Sequence, ast, statement) end do ast%phase = completed end function parse_token_stream recursive subroutine parse_statement (unit_no, lex_line_no, token, ast) integer, intent(in) :: unit_no integer(kind = nk), intent(inout) :: lex_line_no type(lexer_token_t), intent(inout) :: token type(ast_t), intent(out) :: ast ast = ast_nil select case (token%token_no) case (tk_If) call parse_ifelse_construct case (tk_Putc) call parse_putc case (tk_Print) call parse_print case (tk_Semi) call get_token (unit_no, lex_line_no, token) case (tk_Ident) call parse_identifier case (tk_While) call parse_while_construct case (tk_Lbrace) call parse_lbrace_construct case (tk_EOI) continue case default call syntax_error_message ("", tk_start_of_statement, token) stop 1 end select contains recursive subroutine parse_ifelse_construct type(ast_t) :: predicate type(ast_t) :: statement_for_predicate_true type(ast_t) :: statement_for_predicate_false call expect_token ("If", tk_If, token) call get_token (unit_no, lex_line_no, token) call parse_parenthesized_expression (unit_no, lex_line_no, token, predicate) call parse_statement (unit_no, lex_line_no, token, statement_for_predicate_true) if (token%token_no == tk_Else) then call get_token (unit_no, lex_line_no, token) call parse_statement (unit_no, lex_line_no, token, statement_for_predicate_false) ast = make_internal_node (node_If, statement_for_predicate_true, & & statement_for_predicate_false) else ast = make_internal_node (node_If, statement_for_predicate_true, ast_nil) end if ast = make_internal_node (node_If, predicate, ast) end subroutine parse_ifelse_construct recursive subroutine parse_putc type(ast_t) :: arguments call expect_token ("Putc", tk_Putc, token) call get_token (unit_no, lex_line_no, token) call parse_parenthesized_expression (unit_no, lex_line_no, token, arguments) ast = make_internal_node (node_Prtc, arguments, ast_nil) call expect_token ("Putc", tk_Semi, token) call get_token (unit_no, lex_line_no, token) end subroutine parse_putc recursive subroutine parse_print logical :: done type(ast_t) :: arg type(ast_t) :: printer call expect_token ("Print", tk_Print, token) call get_token (unit_no, lex_line_no, token) call expect_token ("Print", tk_Lparen, token) done = .false. do while (.not. done) call get_token (unit_no, lex_line_no, token) select case (token%token_no) case (tk_String) arg = make_leaf_node (node_String, token%val) printer = make_internal_node (node_Prts, arg, ast_nil) call get_token (unit_no, lex_line_no, token) case default call parse_expression (unit_no, 0, lex_line_no, token, arg) printer = make_internal_node (node_Prti, arg, ast_nil) end select ast = make_internal_node (node_Sequence, ast, printer) done = (token%token_no /= tk_Comma) end do call expect_token ("Print", tk_Rparen, token) call get_token (unit_no, lex_line_no, token) call expect_token ("Print", tk_Semi, token) call get_token (unit_no, lex_line_no, token) end subroutine parse_print recursive subroutine parse_identifier type(ast_t) :: left_side type(ast_t) :: right_side left_side = make_leaf_node (node_Identifier, token%val) call get_token (unit_no, lex_line_no, token) call expect_token ("assign", tk_Assign, token) call get_token (unit_no, lex_line_no, token) call parse_expression (unit_no, 0, lex_line_no, token, right_side) ast = make_internal_node (node_Assign, left_side, right_side) call expect_token ("assign", tk_Semi, token) call get_token (unit_no, lex_line_no, token) end subroutine parse_identifier recursive subroutine parse_while_construct type(ast_t) :: predicate type(ast_t) :: statement_to_be_repeated call expect_token ("While", tk_While, token) call get_token (unit_no, lex_line_no, token) call parse_parenthesized_expression (unit_no, lex_line_no, token, predicate) call parse_statement (unit_no, lex_line_no, token, statement_to_be_repeated) ast = make_internal_node (node_While, predicate, statement_to_be_repeated) end subroutine parse_while_construct recursive subroutine parse_lbrace_construct type(ast_t) :: statement call expect_token ("Lbrace", tk_Lbrace, token) call get_token (unit_no, lex_line_no, token) do while (token%token_no /= tk_Rbrace .and. token%token_no /= tk_EOI) call parse_statement (unit_no, lex_line_no, token, statement) ast = make_internal_node (node_Sequence, ast, statement) end do call expect_token ("Lbrace", tk_Rbrace, token) call get_token (unit_no, lex_line_no, token) end subroutine parse_lbrace_construct end subroutine parse_statement recursive subroutine parse_expression (unit_no, p, lex_line_no, token, ast) integer, intent(in) :: unit_no integer, intent(in) :: p integer(kind = nk), intent(inout) :: lex_line_no type(lexer_token_t), intent(inout) :: token type(ast_t), intent(out) :: ast integer :: precedence type(ast_t) :: expression select case (token%token_no) case (tk_Lparen) call parse_parenthesized_expression (unit_no, lex_line_no, token, ast) case (tk_Sub) token%token_no = tk_Negate precedence = lexer_token_precedence(token%token_no) call get_token (unit_no, lex_line_no, token) call parse_expression (unit_no, precedence, lex_line_no, token, expression) ast = make_internal_node (node_Negate, expression, ast_nil) case (tk_Add) token%token_no = tk_Positive precedence = lexer_token_precedence(token%token_no) call get_token (unit_no, lex_line_no, token) call parse_expression (unit_no, precedence, lex_line_no, token, expression) ast = expression case (tk_Not) precedence = lexer_token_precedence(token%token_no) call get_token (unit_no, lex_line_no, token) call parse_expression (unit_no, precedence, lex_line_no, token, expression) ast = make_internal_node (node_Not, expression, ast_nil) case (tk_Ident) ast = make_leaf_node (node_Identifier, token%val) call get_token (unit_no, lex_line_no, token) case (tk_Integer) ast = make_leaf_node (node_Integer, token%val) call get_token (unit_no, lex_line_no, token) case default call syntax_error_message ("", tk_primary, token) stop 1 end select do while (lexer_token_arity(token%token_no) == 2 .and. & & p <= lexer_token_precedence(token%token_no)) block type(ast_t) :: right_expression integer :: q integer :: node_variety if (lexer_token_associativity(token%token_no) == right_associative) then q = lexer_token_precedence(token%token_no) else q = lexer_token_precedence(token%token_no) + 1 end if node_variety = binary_operator_node_variety (token%token_no) call get_token (unit_no, lex_line_no, token) call parse_expression (unit_no, q, lex_line_no, token, right_expression) ast = make_internal_node (node_variety, ast, right_expression) end block end do end subroutine parse_expression recursive subroutine parse_parenthesized_expression (unit_no, lex_line_no, token, ast) integer, intent(in) :: unit_no integer(kind = nk), intent(inout) :: lex_line_no type(lexer_token_t), intent(inout) :: token type(ast_t), intent(out) :: ast call expect_token ("paren_expr", tk_Lparen, token) call get_token (unit_no, lex_line_no, token) call parse_expression (unit_no, 0, lex_line_no, token, ast) call expect_token ("paren_expr", tk_Rparen, token) call get_token (unit_no, lex_line_no, token) end subroutine parse_parenthesized_expression elemental function binary_operator_node_variety (token_no) result (node_variety) integer, intent(in) :: token_no integer :: node_variety select case (token_no) case (tk_Mul) node_variety = node_Multiply case (tk_Div) node_variety = node_Divide case (tk_Mod) node_variety = node_Mod case (tk_Add) node_variety = node_Add case (tk_Sub) node_variety = node_Subtract case (tk_Lss) node_variety = node_Less case (tk_Leq) node_variety = node_LessEqual case (tk_Gtr) node_variety = node_Greater case (tk_Geq) node_variety = node_GreaterEqual case (tk_Eq) node_variety = node_Equal case (tk_Neq) node_variety = node_NotEqual case (tk_And) node_variety = node_And case (tk_Or) node_variety = node_Or case default ! This branch should never be reached. error stop end select end function binary_operator_node_variety function make_internal_node (node_variety, left, right) result (ast) integer, intent(in) :: node_variety class(ast_t), intent(in) :: left, right type(ast_t) :: ast type(ast_node_t), pointer :: node allocate (node) node%node_variety = node_variety node%left => left%node node%right => right%node ast%node => node end function make_internal_node function make_leaf_node (node_variety, val) result (ast) integer, intent(in) :: node_variety character(*, kind = ck), intent(in) :: val type(ast_t) :: ast type(ast_node_t), pointer :: node allocate (node) node%node_variety = node_variety node%val = val ast%node => node end function make_leaf_node subroutine get_token (unit_no, lex_line_no, token) integer, intent(in) :: unit_no integer(kind = nk), intent(inout) :: lex_line_no type(lexer_token_t), intent(out) :: token logical :: eof call get_lexer_token (unit_no, lex_line_no, eof, token) if (eof) then write (error_unit, '("Parser error: the stream of input tokens is incomplete")') stop 1 end if end subroutine get_token subroutine expect_token (message, token_no, token) character(*), intent(in) :: message integer, intent (in) :: token_no class(lexer_token_t), intent(in) :: token if (token%token_no /= token_no) then call syntax_error_message (message, token_no, token) stop 1 end if end subroutine expect_token subroutine syntax_error_message (message, expected_token_no, token) character(*), intent(in) :: message integer, intent(in) :: expected_token_no class(lexer_token_t), intent(in) :: token ! Write a message to an output unit dedicated to printing ! errors. The message could, of course, be more detailed than what ! we are doing here. write (error_unit, '("Syntax error at ", I0, ".", I0)') & & token%line_no, token%column_no ! ! For the sake of the exercise, also write, to output_unit, a ! message in the style of the C reference program. ! write (output_unit, '("(", I0, ", ", I0, ") error: ")', advance = 'no') & & token%line_no, token%column_no select case (expected_token_no) case (tk_start_of_statement) write (output_unit, '("expecting start of statement, found ''", 1A, "''")') & & trim (lexer_token_string(token%token_no)) case (tk_primary) write (output_unit, '("Expecting a primary, found ''", 1A, "''")') & & trim (lexer_token_string(token%token_no)) case default write (output_unit, '(1A, ": Expecting ''", 1A, "'', found ''", 1A, "''")') & & trim (message), trim (lexer_token_string(expected_token_no)), & & trim (lexer_token_string(token%token_no)) end select end subroutine syntax_error_message subroutine output_ast_flattened (unit_no, ast) integer, intent(in) :: unit_no type(ast_t), intent(in) :: ast call output_ast_node_flattened (unit_no, ast%node) end subroutine output_ast_flattened recursive subroutine output_ast_node_flattened (unit_no, node) integer, intent(in) :: unit_no type(ast_node_t), pointer, intent(in) :: node if (.not. associated (node)) then write (unit_no, '(";")') else if (allocated (node%val)) then write (unit_no, '(1A16, 2X, 1A)') & & node_variety_string(node%node_variety), node%val else write (unit_no, '(1A)') & & trim (node_variety_string(node%node_variety)) call output_ast_node_flattened (unit_no, node%left) call output_ast_node_flattened (unit_no, node%right) end if end if end subroutine output_ast_node_flattened end module syntactic_analysis program parse use, intrinsic :: iso_fortran_env, only: input_unit use, intrinsic :: iso_fortran_env, only: output_unit use, intrinsic :: iso_fortran_env, only: error_unit use, non_intrinsic :: syntactic_analysis implicit none integer, parameter :: inp_unit_no = 100 integer, parameter :: outp_unit_no = 101 integer :: arg_count character(200) :: arg integer :: inp integer :: outp arg_count = command_argument_count () if (3 <= arg_count) then call print_usage else if (arg_count == 0) then inp = input_unit outp = output_unit else if (arg_count == 1) then call get_command_argument (1, arg) inp = open_for_input (trim (arg)) outp = output_unit else if (arg_count == 2) then call get_command_argument (1, arg) inp = open_for_input (trim (arg)) call get_command_argument (2, arg) outp = open_for_output (trim (arg)) end if block type(ast_t) :: ast ast = parse_token_stream (inp) call output_ast_flattened (outp, ast) end block end if contains function open_for_input (filename) result (unit_no) character(*), intent(in) :: filename integer :: unit_no integer :: stat open (unit = inp_unit_no, file = filename, status = 'old', & & action = 'read', access = 'stream', form = 'unformatted', & & iostat = stat) if (stat /= 0) then write (error_unit, '("Error: failed to open ", 1A, " for input")') filename stop 1 end if unit_no = inp_unit_no end function open_for_input function open_for_output (filename) result (unit_no) character(*), intent(in) :: filename integer :: unit_no integer :: stat open (unit = outp_unit_no, file = filename, action = 'write', iostat = stat) if (stat /= 0) then write (error_unit, '("Error: failed to open ", 1A, " for output")') filename stop 1 end if unit_no = outp_unit_no end function open_for_output subroutine print_usage character(200) :: progname call get_command_argument (0, progname) write (output_unit, '("Usage: ", 1A, " [INPUT_FILE [OUTPUT_FILE]]")') & & trim (progname) end subroutine print_usage end program parse