RosettaCodeData/Task/Compiler-syntax-analyzer/Fortran/compiler-syntax-analyzer.f

1548 lines
49 KiB
Fortran
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

!!!
!!! An implementation of the Rosetta Code parser task:
!!! https://rosettacode.org/wiki/Compiler/syntax_analyzer
!!!
!!! The implementation is based on the published pseudocode.
!!!
module compiler_type_kinds
use, intrinsic :: iso_fortran_env, only: int32
use, intrinsic :: iso_fortran_env, only: int64
implicit none
private
! Synonyms.
integer, parameter, public :: size_kind = int64
integer, parameter, public :: length_kind = size_kind
integer, parameter, public :: nk = size_kind
! Synonyms for character capable of storing a Unicode code point.
integer, parameter, public :: unicode_char_kind = selected_char_kind ('ISO_10646')
integer, parameter, public :: ck = unicode_char_kind
! Synonyms for integers capable of storing a Unicode code point.
integer, parameter, public :: unicode_ichar_kind = int32
integer, parameter, public :: ick = unicode_ichar_kind
end module compiler_type_kinds
module string_buffers
use, intrinsic :: iso_fortran_env, only: error_unit
use, intrinsic :: iso_fortran_env, only: int64
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
implicit none
private
public :: strbuf_t
type :: strbuf_t
integer(kind = nk), private :: len = 0
!
! chars is made public for efficient access to the individual
! characters.
!
character(1, kind = ck), allocatable, public :: chars(:)
contains
procedure, pass, private :: ensure_storage => strbuf_t_ensure_storage
procedure, pass :: to_unicode_full_string => strbuf_t_to_unicode_full_string
procedure, pass :: to_unicode_substring => strbuf_t_to_unicode_substring
procedure, pass :: length => strbuf_t_length
procedure, pass :: set => strbuf_t_set
procedure, pass :: append => strbuf_t_append
generic :: to_unicode => to_unicode_full_string
generic :: to_unicode => to_unicode_substring
generic :: assignment(=) => set
end type strbuf_t
contains
function strbuf_t_to_unicode_full_string (strbuf) result (s)
class(strbuf_t), intent(in) :: strbuf
character(:, kind = ck), allocatable :: s
!
! This does not actually ensure that the string is valid Unicode;
! any 31-bit character is supported.
!
integer(kind = nk) :: i
allocate (character(len = strbuf%len, kind = ck) :: s)
do i = 1, strbuf%len
s(i:i) = strbuf%chars(i)
end do
end function strbuf_t_to_unicode_full_string
function strbuf_t_to_unicode_substring (strbuf, i, j) result (s)
!
! Extreme values of i and j are allowed, as shortcuts for from
! the beginning, up to the end, or empty substring.
!
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i, j
character(:, kind = ck), allocatable :: s
!
! This does not actually ensure that the string is valid Unicode;
! any 31-bit character is supported.
!
integer(kind = nk) :: i1, j1
integer(kind = nk) :: n
integer(kind = nk) :: k
i1 = max (1_nk, i)
j1 = min (strbuf%len, j)
n = max (0_nk, (j1 - i1) + 1_nk)
allocate (character(n, kind = ck) :: s)
do k = 1, n
s(k:k) = strbuf%chars(i1 + (k - 1_nk))
end do
end function strbuf_t_to_unicode_substring
elemental function strbuf_t_length (strbuf) result (n)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk) :: n
n = strbuf%len
end function strbuf_t_length
elemental function next_power_of_two (x) result (y)
integer(kind = nk), intent(in) :: x
integer(kind = nk) :: y
!
! It is assumed that no more than 64 bits are used.
!
! The branch-free algorithm is that of
! https://archive.is/nKxAc#RoundUpPowerOf2
!
! Fill in bits until one less than the desired power of two is
! reached, and then add one.
!
y = x - 1
y = ior (y, ishft (y, -1))
y = ior (y, ishft (y, -2))
y = ior (y, ishft (y, -4))
y = ior (y, ishft (y, -8))
y = ior (y, ishft (y, -16))
y = ior (y, ishft (y, -32))
y = y + 1
end function next_power_of_two
elemental function new_storage_size (length_needed) result (size)
integer(kind = nk), intent(in) :: length_needed
integer(kind = nk) :: size
! Increase storage by orders of magnitude.
if (2_nk**32 < length_needed) then
size = huge (1_nk)
else
size = next_power_of_two (length_needed)
end if
end function new_storage_size
subroutine strbuf_t_ensure_storage (strbuf, length_needed)
class(strbuf_t), intent(inout) :: strbuf
integer(kind = nk), intent(in) :: length_needed
integer(kind = nk) :: new_size
type(strbuf_t) :: new_strbuf
if (.not. allocated (strbuf%chars)) then
! Initialize a new strbuf%chars array.
new_size = new_storage_size (length_needed)
allocate (strbuf%chars(1:new_size))
else if (ubound (strbuf%chars, 1) < length_needed) then
! Allocate a new strbuf%chars array, larger than the current
! one, but containing the same characters.
new_size = new_storage_size (length_needed)
allocate (new_strbuf%chars(1:new_size))
new_strbuf%chars(1:strbuf%len) = strbuf%chars(1:strbuf%len)
call move_alloc (new_strbuf%chars, strbuf%chars)
end if
end subroutine strbuf_t_ensure_storage
subroutine strbuf_t_set (dst, src)
class(strbuf_t), intent(inout) :: dst
class(*), intent(in) :: src
integer(kind = nk) :: n
integer(kind = nk) :: i
select type (src)
type is (character(*, kind = ck))
n = len (src, kind = nk)
call dst%ensure_storage(n)
do i = 1, n
dst%chars(i) = src(i:i)
end do
dst%len = n
type is (character(*))
n = len (src, kind = nk)
call dst%ensure_storage(n)
do i = 1, n
dst%chars(i) = src(i:i)
end do
dst%len = n
class is (strbuf_t)
n = src%len
call dst%ensure_storage(n)
dst%chars(1:n) = src%chars(1:n)
dst%len = n
class default
error stop
end select
end subroutine strbuf_t_set
subroutine strbuf_t_append (dst, src)
class(strbuf_t), intent(inout) :: dst
class(*), intent(in) :: src
integer(kind = nk) :: n_dst, n_src, n
integer(kind = nk) :: i
select type (src)
type is (character(*, kind = ck))
n_dst = dst%len
n_src = len (src, kind = nk)
n = n_dst + n_src
call dst%ensure_storage(n)
do i = 1, n_src
dst%chars(n_dst + i) = src(i:i)
end do
dst%len = n
type is (character(*))
n_dst = dst%len
n_src = len (src, kind = nk)
n = n_dst + n_src
call dst%ensure_storage(n)
do i = 1, n_src
dst%chars(n_dst + i) = src(i:i)
end do
dst%len = n
class is (strbuf_t)
n_dst = dst%len
n_src = src%len
n = n_dst + n_src
call dst%ensure_storage(n)
dst%chars((n_dst + 1):n) = src%chars(1:n_src)
dst%len = n
class default
error stop
end select
end subroutine strbuf_t_append
end module string_buffers
module reading_one_line_from_a_stream
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
use, non_intrinsic :: string_buffers
implicit none
private
! get_line_from_stream: read an entire input line from a stream into
! a strbuf_t.
public :: get_line_from_stream
character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck)
! The following is correct for Unix and its relatives.
character(1, kind = ck), parameter :: newline_char = linefeed_char
contains
subroutine get_line_from_stream (unit_no, eof, no_newline, strbuf)
integer, intent(in) :: unit_no
logical, intent(out) :: eof ! End of file?
logical, intent(out) :: no_newline ! There is a line but it has no
! newline? (Thus eof also must
! be .true.)
class(strbuf_t), intent(inout) :: strbuf
character(1, kind = ck) :: ch
strbuf = ''
call get_ch (unit_no, eof, ch)
do while (.not. eof .and. ch /= newline_char)
call strbuf%append (ch)
call get_ch (unit_no, eof, ch)
end do
no_newline = eof .and. (strbuf%length() /= 0)
end subroutine get_line_from_stream
subroutine get_ch (unit_no, eof, ch)
!
! Read a single code point from the stream.
!
! Currently this procedure simply inputs ASCII bytes rather than
! Unicode code points.
!
integer, intent(in) :: unit_no
logical, intent(out) :: eof
character(1, kind = ck), intent(out) :: ch
integer :: stat
character(1) :: c = '*'
eof = .false.
if (unit_no == input_unit) then
call get_input_unit_char (c, stat)
else
read (unit = unit_no, iostat = stat) c
end if
if (stat < 0) then
ch = ck_'*'
eof = .true.
else if (0 < stat) then
write (error_unit, '("Input error with status code ", I0)') stat
stop 1
else
ch = char (ichar (c, kind = ick), kind = ck)
end if
end subroutine get_ch
!!!
!!! If you tell gfortran you want -std=f2008 or -std=f2018, you likely
!!! will need to add also -fall-intrinsics or -U__GFORTRAN__
!!!
!!! The first way, you get the FGETC intrinsic. The latter way, you
!!! get the C interface code that uses getchar(3).
!!!
#ifdef __GFORTRAN__
subroutine get_input_unit_char (c, stat)
!
! The following works if you are using gfortran.
!
! (FGETC is considered a feature for backwards compatibility with
! g77. However, I know of no way to reconfigure input_unit as a
! Fortran 2003 stream, for use with ordinary read.)
!
character, intent(inout) :: c
integer, intent(out) :: stat
call fgetc (input_unit, c, stat)
end subroutine get_input_unit_char
#else
subroutine get_input_unit_char (c, stat)
!
! An alternative implementation of get_input_unit_char. This
! actually reads input from the C standard input, which might not
! be the same as input_unit.
!
use, intrinsic :: iso_c_binding, only: c_int
character, intent(inout) :: c
integer, intent(out) :: stat
interface
!
! Use getchar(3) to read characters from standard input. This
! assumes there is actually such a function available, and that
! getchar(3) does not exist solely as a macro. (One could write
! ones own getchar() if necessary, of course.)
!
function getchar () result (c) bind (c, name = 'getchar')
use, intrinsic :: iso_c_binding, only: c_int
integer(kind = c_int) :: c
end function getchar
end interface
integer(kind = c_int) :: i_char
i_char = getchar ()
!
! The C standard requires that EOF have a negative value. If the
! value returned by getchar(3) is not EOF, then it will be
! representable as an unsigned char. Therefore, to check for end
! of file, one need only test whether i_char is negative.
!
if (i_char < 0) then
stat = -1
else
stat = 0
c = char (i_char)
end if
end subroutine get_input_unit_char
#endif
end module reading_one_line_from_a_stream
module lexer_token_facts
implicit none
private
integer, parameter, public :: tk_EOI = 0
integer, parameter, public :: tk_Mul = 1
integer, parameter, public :: tk_Div = 2
integer, parameter, public :: tk_Mod = 3
integer, parameter, public :: tk_Add = 4
integer, parameter, public :: tk_Sub = 5
integer, parameter, public :: tk_Negate = 6
integer, parameter, public :: tk_Not = 7
integer, parameter, public :: tk_Lss = 8
integer, parameter, public :: tk_Leq = 9
integer, parameter, public :: tk_Gtr = 10
integer, parameter, public :: tk_Geq = 11
integer, parameter, public :: tk_Eq = 12
integer, parameter, public :: tk_Neq = 13
integer, parameter, public :: tk_Assign = 14
integer, parameter, public :: tk_And = 15
integer, parameter, public :: tk_Or = 16
integer, parameter, public :: tk_If = 17
integer, parameter, public :: tk_Else = 18
integer, parameter, public :: tk_While = 19
integer, parameter, public :: tk_Print = 20
integer, parameter, public :: tk_Putc = 21
integer, parameter, public :: tk_Lparen = 22
integer, parameter, public :: tk_Rparen = 23
integer, parameter, public :: tk_Lbrace = 24
integer, parameter, public :: tk_Rbrace = 25
integer, parameter, public :: tk_Semi = 26
integer, parameter, public :: tk_Comma = 27
integer, parameter, public :: tk_Ident = 28
integer, parameter, public :: tk_Integer = 29
integer, parameter, public :: tk_String = 30
integer, parameter, public :: tk_Positive = 31
character(16), parameter, public :: lexer_token_string(0:31) = &
(/ "EOI ", &
& "* ", &
& "/ ", &
& "% ", &
& "+ ", &
& "- ", &
& "- ", &
& "! ", &
& "< ", &
& "<= ", &
& "> ", &
& ">= ", &
& "== ", &
& "!= ", &
& "= ", &
& "&& ", &
& "|| ", &
& "if ", &
& "else ", &
& "while ", &
& "print ", &
& "putc ", &
& "( ", &
& ") ", &
& "{ ", &
& "} ", &
& "; ", &
& ", ", &
& "Ident ", &
& "Integer literal ", &
& "String literal ", &
& "+ " /)
integer, parameter, public :: lexer_token_arity(0:31) = &
& (/ -1, & ! EOI
& 2, 2, 2, 2, 2, & ! * / % + -
& 1, 1, & ! negate !
& 2, 2, 2, 2, 2, 2, & ! < <= > >= == !=
& -1, & ! =
& 2, 2, & ! && ||
& -1, -1, -1, -1, -1, & !
& -1, -1, -1, -1, -1, & !
& -1, -1, -1, -1, & !
& 1 /) ! positive
integer, parameter, public :: lexer_token_precedence(0:31) = &
& (/ -1, & ! EOI
& 13, 13, 13, & ! * / %
& 12, 12, & ! + -
& 14, 14, & ! negate !
& 10, 10, 10, 10, & ! < <= > >=
& 9, 9, & ! == !=
& -1, & ! =
& 5, & ! &&
& 4, & ! ||
& -1, -1, -1, -1, -1, & !
& -1, -1, -1, -1, -1, & !
& -1, -1, -1, -1, & !
& 14 /) ! positive
integer, parameter, public :: left_associative = 0
integer, parameter, public :: right_associative = 1
! All current operators are left associative. (The values in the
! array for things that are not operators are unimportant.)
integer, parameter, public :: lexer_token_associativity(0:31) = left_associative
end module lexer_token_facts
module reading_of_lexer_tokens
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
use, non_intrinsic :: string_buffers
use, non_intrinsic :: reading_one_line_from_a_stream
use, non_intrinsic :: lexer_token_facts
implicit none
private
public :: lexer_token_t
public :: get_lexer_token
character(1, kind = ck), parameter :: horizontal_tab_char = char (9, kind = ck)
character(1, kind = ck), parameter :: linefeed_char = char (10, kind = ck)
character(1, kind = ck), parameter :: vertical_tab_char = char (11, kind = ck)
character(1, kind = ck), parameter :: formfeed_char = char (12, kind = ck)
character(1, kind = ck), parameter :: carriage_return_char = char (13, kind = ck)
character(1, kind = ck), parameter :: space_char = ck_' '
type :: lexer_token_t
integer :: token_no = -(huge (1))
character(:, kind = ck), allocatable :: val
integer(nk) :: line_no = -(huge (1_nk))
integer(nk) :: column_no = -(huge (1_nk))
end type lexer_token_t
contains
subroutine get_lexer_token (unit_no, lex_line_no, eof, token)
!
! Lines that are empty or contain only whitespace are tolerated.
!
! Also tolerated are comment lines, whose first character is a
! '!'. It is convenient for debugging to be able to comment out
! lines.
!
! A last line be without a newline is *not* tolerated, unless it
! contains only whitespace.
!
! Letting there be some whitespace is partly for the sake of
! reading cut-and-paste from a browser display.
!
integer, intent(in) :: unit_no
integer(kind = nk), intent(inout) :: lex_line_no
logical, intent(out) :: eof
type(lexer_token_t), intent(out) :: token
type(strbuf_t) :: strbuf
logical :: no_newline
logical :: input_found
! Let a negative setting initialize the line number.
lex_line_no = max (0_nk, lex_line_no)
strbuf = ''
eof = .false.
input_found = .false.
do while (.not. eof .and. .not. input_found)
call get_line_from_stream (unit_no, eof, no_newline, strbuf)
if (eof) then
if (no_newline) then
lex_line_no = lex_line_no + 1
if (.not. strbuf_is_all_whitespace (strbuf)) then
call start_error_message (lex_line_no)
write (error_unit, '("lexer line ends without a newline")')
stop 1
end if
end if
else
lex_line_no = lex_line_no + 1
input_found = .true.
if (strbuf_is_all_whitespace (strbuf)) then
! A blank line.
input_found = .false.
else if (0 < strbuf%length()) then
if (strbuf%chars(1) == ck_'!') then
! A comment line.
input_found = .false.
end if
end if
end if
end do
token = lexer_token_t ()
if (.not. eof) then
token = strbuf_to_token (lex_line_no, strbuf)
end if
end subroutine get_lexer_token
function strbuf_to_token (lex_line_no, strbuf) result (token)
integer(kind = nk), intent(in) :: lex_line_no
class(strbuf_t), intent(in) :: strbuf
type(lexer_token_t) :: token
character(:, kind = ck), allocatable :: line_no
character(:, kind = ck), allocatable :: column_no
character(:, kind = ck), allocatable :: token_name
character(:, kind = ck), allocatable :: val_string
integer :: stat
integer(kind = nk) :: n
call split_line (lex_line_no, strbuf, line_no, column_no, token_name, val_string)
read (line_no, *, iostat = stat) token%line_no
if (stat /= 0) then
call start_error_message (lex_line_no)
write (error_unit, '("line number field is unreadable or too large")')
stop 1
end if
read (column_no, *, iostat = stat) token%column_no
if (stat /= 0) then
call start_error_message (lex_line_no)
write (error_unit, '("column number field is unreadable or too large")')
stop 1
end if
token%token_no = token_name_to_token_no (lex_line_no, token_name)
select case (token%token_no)
case (tk_Ident)
! I do no checking of identifier names.
allocate (token%val, source = val_string)
case (tk_Integer)
call check_is_all_digits (lex_line_no, val_string)
allocate (token%val, source = val_string)
case (tk_String)
n = len (val_string, kind = nk)
if (n < 2) then
call string_literal_missing_or_no_good
else if (val_string(1:1) /= ck_'"' .or. val_string(n:n) /= ck_'"') then
call string_literal_missing_or_no_good
else
allocate (token%val, source = val_string)
end if
case default
if (len (val_string, kind = nk) /= 0) then
call start_error_message (lex_line_no)
write (error_unit, '("token should not have a value")')
stop 1
end if
end select
contains
subroutine string_literal_missing_or_no_good
call start_error_message (lex_line_no)
write (error_unit, '("""String"" token requires a string literal")')
stop 1
end subroutine string_literal_missing_or_no_good
end function strbuf_to_token
subroutine split_line (lex_line_no, strbuf, line_no, column_no, token_name, val_string)
integer(kind = nk), intent(in) :: lex_line_no
class(strbuf_t), intent(in) :: strbuf
character(:, kind = ck), allocatable, intent(out) :: line_no
character(:, kind = ck), allocatable, intent(out) :: column_no
character(:, kind = ck), allocatable, intent(out) :: token_name
character(:, kind = ck), allocatable, intent(out) :: val_string
integer(kind = nk) :: i, j
i = skip_whitespace (strbuf, 1_nk)
j = skip_non_whitespace (strbuf, i)
line_no = strbuf%to_unicode(i, j - 1)
call check_is_all_digits (lex_line_no, line_no)
i = skip_whitespace (strbuf, j)
j = skip_non_whitespace (strbuf, i)
column_no = strbuf%to_unicode(i, j - 1)
call check_is_all_digits (lex_line_no, column_no)
i = skip_whitespace (strbuf, j)
j = skip_non_whitespace (strbuf, i)
token_name = strbuf%to_unicode(i, j - 1)
i = skip_whitespace (strbuf, j)
if (strbuf%length() < i) then
val_string = ck_''
else if (strbuf%chars(i) == ck_'"') then
j = skip_whitespace_backwards (strbuf, strbuf%length())
if (strbuf%chars(j) == ck_'"') then
val_string = strbuf%to_unicode(i, j)
else
call start_error_message (lex_line_no)
write (error_unit, '("string literal does not end in a double quote")')
stop 1
end if
else
j = skip_non_whitespace (strbuf, i)
val_string = strbuf%to_unicode(i, j - 1)
i = skip_whitespace (strbuf, j)
if (i <= strbuf%length()) then
call start_error_message (lex_line_no)
write (error_unit, '("token line contains unexpected text")')
stop 1
end if
end if
end subroutine split_line
function token_name_to_token_no (lex_line_no, token_name) result (token_no)
integer(kind = nk), intent(in) :: lex_line_no
character(*, kind = ck), intent(in) :: token_name
integer :: token_no
!!
!! This implementation is not optimized in any way, unless the
!! Fortran compiler can optimize the SELECT CASE.
!!
select case (token_name)
case (ck_"End_of_input")
token_no = tk_EOI
case (ck_"Op_multiply")
token_no = tk_Mul
case (ck_"Op_divide")
token_no = tk_Div
case (ck_"Op_mod")
token_no = tk_Mod
case (ck_"Op_add")
token_no = tk_Add
case (ck_"Op_subtract")
token_no = tk_Sub
case (ck_"Op_negate")
token_no = tk_Negate
case (ck_"Op_not")
token_no = tk_Not
case (ck_"Op_less")
token_no = tk_Lss
case (ck_"Op_lessequal ")
token_no = tk_Leq
case (ck_"Op_greater")
token_no = tk_Gtr
case (ck_"Op_greaterequal")
token_no = tk_Geq
case (ck_"Op_equal")
token_no = tk_Eq
case (ck_"Op_notequal")
token_no = tk_Neq
case (ck_"Op_assign")
token_no = tk_Assign
case (ck_"Op_and")
token_no = tk_And
case (ck_"Op_or")
token_no = tk_Or
case (ck_"Keyword_if")
token_no = tk_If
case (ck_"Keyword_else")
token_no = tk_Else
case (ck_"Keyword_while")
token_no = tk_While
case (ck_"Keyword_print")
token_no = tk_Print
case (ck_"Keyword_putc")
token_no = tk_Putc
case (ck_"LeftParen")
token_no = tk_Lparen
case (ck_"RightParen")
token_no = tk_Rparen
case (ck_"LeftBrace")
token_no = tk_Lbrace
case (ck_"RightBrace")
token_no = tk_Rbrace
case (ck_"Semicolon")
token_no = tk_Semi
case (ck_"Comma")
token_no = tk_Comma
case (ck_"Identifier")
token_no = tk_Ident
case (ck_"Integer")
token_no = tk_Integer
case (ck_"String")
token_no = tk_String
case default
call start_error_message (lex_line_no)
write (error_unit, '("unrecognized token name: ", A)') token_name
stop 1
end select
end function token_name_to_token_no
function skip_whitespace (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
logical :: done
j = i
done = .false.
do while (.not. done)
if (at_end_of_line (strbuf, j)) then
done = .true.
else if (.not. isspace (strbuf%chars(j))) then
done = .true.
else
j = j + 1
end if
end do
end function skip_whitespace
function skip_non_whitespace (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
logical :: done
j = i
done = .false.
do while (.not. done)
if (at_end_of_line (strbuf, j)) then
done = .true.
else if (isspace (strbuf%chars(j))) then
done = .true.
else
j = j + 1
end if
end do
end function skip_non_whitespace
function skip_whitespace_backwards (strbuf, i) result (j)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
integer(kind = nk) :: j
logical :: done
j = i
done = .false.
do while (.not. done)
if (j == -1) then
done = .true.
else if (.not. isspace (strbuf%chars(j))) then
done = .true.
else
j = j - 1
end if
end do
end function skip_whitespace_backwards
function at_end_of_line (strbuf, i) result (bool)
class(strbuf_t), intent(in) :: strbuf
integer(kind = nk), intent(in) :: i
logical :: bool
bool = (strbuf%length() < i)
end function at_end_of_line
elemental function strbuf_is_all_whitespace (strbuf) result (bool)
class(strbuf_t), intent(in) :: strbuf
logical :: bool
integer(kind = nk) :: n
integer(kind = nk) :: i
n = strbuf%length()
if (n == 0) then
bool = .true.
else
i = 1
bool = .true.
do while (bool .and. i /= n + 1)
bool = isspace (strbuf%chars(i))
i = i + 1
end do
end if
end function strbuf_is_all_whitespace
elemental function isspace (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool
bool = (ch == horizontal_tab_char) .or. &
& (ch == linefeed_char) .or. &
& (ch == vertical_tab_char) .or. &
& (ch == formfeed_char) .or. &
& (ch == carriage_return_char) .or. &
& (ch == space_char)
end function isspace
elemental function isdigit (ch) result (bool)
character(1, kind = ck), intent(in) :: ch
logical :: bool
integer(kind = ick), parameter :: zero = ichar (ck_'0', kind = ick)
integer(kind = ick), parameter :: nine = ichar (ck_'9', kind = ick)
integer(kind = ick) :: i_ch
i_ch = ichar (ch, kind = ick)
bool = (zero <= i_ch .and. i_ch <= nine)
end function isdigit
subroutine check_is_all_digits (lex_line_no, str)
integer(kind = nk), intent(in) :: lex_line_no
character(*, kind = ck), intent(in) :: str
integer(kind = nk) :: n
integer(kind = nk) :: i
n = len (str, kind = nk)
if (n == 0_nk) then
call start_error_message (lex_line_no)
write (error_unit, '("a required field is missing")')
stop 1
else
do i = 1, n
if (.not. isdigit (str(i:i))) then
call start_error_message (lex_line_no)
write (error_unit, '("a numeric field contains a non-digit")')
stop 1
end if
end do
end if
end subroutine check_is_all_digits
subroutine start_error_message (lex_line_no)
integer(kind = nk), intent(in) :: lex_line_no
write (error_unit, '("Token stream error at line ", I0, ": ")', advance = 'no') &
& lex_line_no
end subroutine start_error_message
end module reading_of_lexer_tokens
module syntactic_analysis
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: compiler_type_kinds, only: nk, ck, ick
use, non_intrinsic :: string_buffers
use, non_intrinsic :: lexer_token_facts
use, non_intrinsic :: reading_of_lexer_tokens
implicit none
private
public :: ast_node_t
public :: ast_t
public :: parse_token_stream
public :: output_ast_flattened
integer, parameter, public :: tk_start_of_statement = -1
integer, parameter, public :: tk_primary = -2
integer, parameter :: node_Identifier = 1
integer, parameter :: node_String = 2
integer, parameter :: node_Integer = 3
integer, parameter :: node_Sequence = 4
integer, parameter :: node_If = 5
integer, parameter :: node_Prtc = 6
integer, parameter :: node_Prts = 7
integer, parameter :: node_Prti = 8
integer, parameter :: node_While = 9
integer, parameter :: node_Assign = 10
integer, parameter :: node_Negate = 11
integer, parameter :: node_Not = 12
integer, parameter :: node_Multiply = 13
integer, parameter :: node_Divide = 14
integer, parameter :: node_Mod = 15
integer, parameter :: node_Add = 16
integer, parameter :: node_Subtract = 17
integer, parameter :: node_Less = 18
integer, parameter :: node_LessEqual = 19
integer, parameter :: node_Greater = 20
integer, parameter :: node_GreaterEqual = 21
integer, parameter :: node_Equal = 22
integer, parameter :: node_NotEqual = 23
integer, parameter :: node_And = 24
integer, parameter :: node_Or = 25
character(16), parameter :: node_variety_string(1:25) = &
(/ "Identifier ", &
& "String ", &
& "Integer ", &
& "Sequence ", &
& "If ", &
& "Prtc ", &
& "Prts ", &
& "Prti ", &
& "While ", &
& "Assign ", &
& "Negate ", &
& "Not ", &
& "Multiply ", &
& "Divide ", &
& "Mod ", &
& "Add ", &
& "Subtract ", &
& "Less ", &
& "LessEqual ", &
& "Greater ", &
& "GreaterEqual ", &
& "Equal ", &
& "NotEqual ", &
& "And ", &
& "Or " /)
type :: ast_node_t
integer :: node_variety
character(:, kind = ck), allocatable :: val
type(ast_node_t), pointer :: left => null ()
type(ast_node_t), pointer :: right => null ()
contains
procedure, pass :: assign => ast_node_t_assign
generic :: assignment(=) => assign
final :: ast_node_t_finalize
end type ast_node_t
! ast_t phases.
integer, parameter :: building = 1
integer, parameter :: completed = 2
type :: ast_t
!
! This type is used to build the subtrees, as well as for the
! completed AST. The difference is in the setting of phase.
!
type(ast_node_t), pointer :: node => null ()
integer, private :: phase = building
contains
procedure, pass :: assign => ast_t_assign
generic :: assignment(=) => assign
final :: ast_t_finalize
end type ast_t
type(ast_t), parameter :: ast_nil = ast_t (null ())
contains
recursive subroutine ast_node_t_assign (node, other)
class(ast_node_t), intent(out) :: node
class(*), intent(in) :: other
select type (other)
class is (ast_node_t)
node%node_variety = other%node_variety
if (allocated (other%val)) allocate (node%val, source = other%val)
if (associated (other%left)) allocate (node%left, source = other%left)
if (associated (other%right)) allocate (node%right, source = other%right)
class default
! This branch should never be reached.
error stop
end select
end subroutine ast_node_t_assign
recursive subroutine ast_node_t_finalize (node)
type(ast_node_t), intent(inout) :: node
if (associated (node%left)) deallocate (node%left)
if (associated (node%right)) deallocate (node%right)
end subroutine ast_node_t_finalize
recursive subroutine ast_t_assign (ast, other)
class(ast_t), intent(out) :: ast
class(*), intent(in) :: other
select type (other)
class is (ast_t)
if (associated (other%node)) allocate (ast%node, source = other%node)
!
! Whether it is better to set phase to building or to set it
! to other%phase is unclear to me. Probably building is the
! better choice. Which variable controls memory recovery is
! clear and unchanging, in that case: it is the original,
! other, that does.
!
ast%phase = building
class default
! This should not happen.
error stop
end select
end subroutine ast_t_assign
subroutine ast_t_finalize (ast)
type(ast_t), intent(inout) :: ast
!
! When we are building the tree, the trees nodes should not be
! deallocated when the ast_t variable temporarily holding them
! goes out of scope.
!
! However, once the AST is completed, we do want the memory
! recovered when the variable goes out of scope.
!
! (Elsewhere I have written a primitive garbage collector for
! Fortran programs, but in this case it would be a lot of overhead
! for little gain. In fact, we could reasonably just let the
! memory leak, in this program.
!
! Fortran runtimes *are* allowed by the standard to have garbage
! collectors built in. To my knowledge, at the time of this
! writing, only NAG Fortran has a garbage collector option.)
!
if (ast%phase == completed) then
if (associated (ast%node)) deallocate (ast%node)
end if
end subroutine ast_t_finalize
function parse_token_stream (unit_no) result (ast)
integer, intent(in) :: unit_no
type(ast_t) :: ast
integer(kind = nk) :: lex_line_no
type(ast_t) :: statement
type(lexer_token_t) :: token
lex_line_no = -1_nk
call get_token (unit_no, lex_line_no, token)
call parse_statement (unit_no, lex_line_no, token, statement)
ast = make_internal_node (node_Sequence, ast, statement)
do while (token%token_no /= tk_EOI)
call parse_statement (unit_no, lex_line_no, token, statement)
ast = make_internal_node (node_Sequence, ast, statement)
end do
ast%phase = completed
end function parse_token_stream
recursive subroutine parse_statement (unit_no, lex_line_no, token, ast)
integer, intent(in) :: unit_no
integer(kind = nk), intent(inout) :: lex_line_no
type(lexer_token_t), intent(inout) :: token
type(ast_t), intent(out) :: ast
ast = ast_nil
select case (token%token_no)
case (tk_If)
call parse_ifelse_construct
case (tk_Putc)
call parse_putc
case (tk_Print)
call parse_print
case (tk_Semi)
call get_token (unit_no, lex_line_no, token)
case (tk_Ident)
call parse_identifier
case (tk_While)
call parse_while_construct
case (tk_Lbrace)
call parse_lbrace_construct
case (tk_EOI)
continue
case default
call syntax_error_message ("", tk_start_of_statement, token)
stop 1
end select
contains
recursive subroutine parse_ifelse_construct
type(ast_t) :: predicate
type(ast_t) :: statement_for_predicate_true
type(ast_t) :: statement_for_predicate_false
call expect_token ("If", tk_If, token)
call get_token (unit_no, lex_line_no, token)
call parse_parenthesized_expression (unit_no, lex_line_no, token, predicate)
call parse_statement (unit_no, lex_line_no, token, statement_for_predicate_true)
if (token%token_no == tk_Else) then
call get_token (unit_no, lex_line_no, token)
call parse_statement (unit_no, lex_line_no, token, statement_for_predicate_false)
ast = make_internal_node (node_If, statement_for_predicate_true, &
& statement_for_predicate_false)
else
ast = make_internal_node (node_If, statement_for_predicate_true, ast_nil)
end if
ast = make_internal_node (node_If, predicate, ast)
end subroutine parse_ifelse_construct
recursive subroutine parse_putc
type(ast_t) :: arguments
call expect_token ("Putc", tk_Putc, token)
call get_token (unit_no, lex_line_no, token)
call parse_parenthesized_expression (unit_no, lex_line_no, token, arguments)
ast = make_internal_node (node_Prtc, arguments, ast_nil)
call expect_token ("Putc", tk_Semi, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_putc
recursive subroutine parse_print
logical :: done
type(ast_t) :: arg
type(ast_t) :: printer
call expect_token ("Print", tk_Print, token)
call get_token (unit_no, lex_line_no, token)
call expect_token ("Print", tk_Lparen, token)
done = .false.
do while (.not. done)
call get_token (unit_no, lex_line_no, token)
select case (token%token_no)
case (tk_String)
arg = make_leaf_node (node_String, token%val)
printer = make_internal_node (node_Prts, arg, ast_nil)
call get_token (unit_no, lex_line_no, token)
case default
call parse_expression (unit_no, 0, lex_line_no, token, arg)
printer = make_internal_node (node_Prti, arg, ast_nil)
end select
ast = make_internal_node (node_Sequence, ast, printer)
done = (token%token_no /= tk_Comma)
end do
call expect_token ("Print", tk_Rparen, token)
call get_token (unit_no, lex_line_no, token)
call expect_token ("Print", tk_Semi, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_print
recursive subroutine parse_identifier
type(ast_t) :: left_side
type(ast_t) :: right_side
left_side = make_leaf_node (node_Identifier, token%val)
call get_token (unit_no, lex_line_no, token)
call expect_token ("assign", tk_Assign, token)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, 0, lex_line_no, token, right_side)
ast = make_internal_node (node_Assign, left_side, right_side)
call expect_token ("assign", tk_Semi, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_identifier
recursive subroutine parse_while_construct
type(ast_t) :: predicate
type(ast_t) :: statement_to_be_repeated
call expect_token ("While", tk_While, token)
call get_token (unit_no, lex_line_no, token)
call parse_parenthesized_expression (unit_no, lex_line_no, token, predicate)
call parse_statement (unit_no, lex_line_no, token, statement_to_be_repeated)
ast = make_internal_node (node_While, predicate, statement_to_be_repeated)
end subroutine parse_while_construct
recursive subroutine parse_lbrace_construct
type(ast_t) :: statement
call expect_token ("Lbrace", tk_Lbrace, token)
call get_token (unit_no, lex_line_no, token)
do while (token%token_no /= tk_Rbrace .and. token%token_no /= tk_EOI)
call parse_statement (unit_no, lex_line_no, token, statement)
ast = make_internal_node (node_Sequence, ast, statement)
end do
call expect_token ("Lbrace", tk_Rbrace, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_lbrace_construct
end subroutine parse_statement
recursive subroutine parse_expression (unit_no, p, lex_line_no, token, ast)
integer, intent(in) :: unit_no
integer, intent(in) :: p
integer(kind = nk), intent(inout) :: lex_line_no
type(lexer_token_t), intent(inout) :: token
type(ast_t), intent(out) :: ast
integer :: precedence
type(ast_t) :: expression
select case (token%token_no)
case (tk_Lparen)
call parse_parenthesized_expression (unit_no, lex_line_no, token, ast)
case (tk_Sub)
token%token_no = tk_Negate
precedence = lexer_token_precedence(token%token_no)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, precedence, lex_line_no, token, expression)
ast = make_internal_node (node_Negate, expression, ast_nil)
case (tk_Add)
token%token_no = tk_Positive
precedence = lexer_token_precedence(token%token_no)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, precedence, lex_line_no, token, expression)
ast = expression
case (tk_Not)
precedence = lexer_token_precedence(token%token_no)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, precedence, lex_line_no, token, expression)
ast = make_internal_node (node_Not, expression, ast_nil)
case (tk_Ident)
ast = make_leaf_node (node_Identifier, token%val)
call get_token (unit_no, lex_line_no, token)
case (tk_Integer)
ast = make_leaf_node (node_Integer, token%val)
call get_token (unit_no, lex_line_no, token)
case default
call syntax_error_message ("", tk_primary, token)
stop 1
end select
do while (lexer_token_arity(token%token_no) == 2 .and. &
& p <= lexer_token_precedence(token%token_no))
block
type(ast_t) :: right_expression
integer :: q
integer :: node_variety
if (lexer_token_associativity(token%token_no) == right_associative) then
q = lexer_token_precedence(token%token_no)
else
q = lexer_token_precedence(token%token_no) + 1
end if
node_variety = binary_operator_node_variety (token%token_no)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, q, lex_line_no, token, right_expression)
ast = make_internal_node (node_variety, ast, right_expression)
end block
end do
end subroutine parse_expression
recursive subroutine parse_parenthesized_expression (unit_no, lex_line_no, token, ast)
integer, intent(in) :: unit_no
integer(kind = nk), intent(inout) :: lex_line_no
type(lexer_token_t), intent(inout) :: token
type(ast_t), intent(out) :: ast
call expect_token ("paren_expr", tk_Lparen, token)
call get_token (unit_no, lex_line_no, token)
call parse_expression (unit_no, 0, lex_line_no, token, ast)
call expect_token ("paren_expr", tk_Rparen, token)
call get_token (unit_no, lex_line_no, token)
end subroutine parse_parenthesized_expression
elemental function binary_operator_node_variety (token_no) result (node_variety)
integer, intent(in) :: token_no
integer :: node_variety
select case (token_no)
case (tk_Mul)
node_variety = node_Multiply
case (tk_Div)
node_variety = node_Divide
case (tk_Mod)
node_variety = node_Mod
case (tk_Add)
node_variety = node_Add
case (tk_Sub)
node_variety = node_Subtract
case (tk_Lss)
node_variety = node_Less
case (tk_Leq)
node_variety = node_LessEqual
case (tk_Gtr)
node_variety = node_Greater
case (tk_Geq)
node_variety = node_GreaterEqual
case (tk_Eq)
node_variety = node_Equal
case (tk_Neq)
node_variety = node_NotEqual
case (tk_And)
node_variety = node_And
case (tk_Or)
node_variety = node_Or
case default
! This branch should never be reached.
error stop
end select
end function binary_operator_node_variety
function make_internal_node (node_variety, left, right) result (ast)
integer, intent(in) :: node_variety
class(ast_t), intent(in) :: left, right
type(ast_t) :: ast
type(ast_node_t), pointer :: node
allocate (node)
node%node_variety = node_variety
node%left => left%node
node%right => right%node
ast%node => node
end function make_internal_node
function make_leaf_node (node_variety, val) result (ast)
integer, intent(in) :: node_variety
character(*, kind = ck), intent(in) :: val
type(ast_t) :: ast
type(ast_node_t), pointer :: node
allocate (node)
node%node_variety = node_variety
node%val = val
ast%node => node
end function make_leaf_node
subroutine get_token (unit_no, lex_line_no, token)
integer, intent(in) :: unit_no
integer(kind = nk), intent(inout) :: lex_line_no
type(lexer_token_t), intent(out) :: token
logical :: eof
call get_lexer_token (unit_no, lex_line_no, eof, token)
if (eof) then
write (error_unit, '("Parser error: the stream of input tokens is incomplete")')
stop 1
end if
end subroutine get_token
subroutine expect_token (message, token_no, token)
character(*), intent(in) :: message
integer, intent (in) :: token_no
class(lexer_token_t), intent(in) :: token
if (token%token_no /= token_no) then
call syntax_error_message (message, token_no, token)
stop 1
end if
end subroutine expect_token
subroutine syntax_error_message (message, expected_token_no, token)
character(*), intent(in) :: message
integer, intent(in) :: expected_token_no
class(lexer_token_t), intent(in) :: token
! Write a message to an output unit dedicated to printing
! errors. The message could, of course, be more detailed than what
! we are doing here.
write (error_unit, '("Syntax error at ", I0, ".", I0)') &
& token%line_no, token%column_no
!
! For the sake of the exercise, also write, to output_unit, a
! message in the style of the C reference program.
!
write (output_unit, '("(", I0, ", ", I0, ") error: ")', advance = 'no') &
& token%line_no, token%column_no
select case (expected_token_no)
case (tk_start_of_statement)
write (output_unit, '("expecting start of statement, found ''", 1A, "''")') &
& trim (lexer_token_string(token%token_no))
case (tk_primary)
write (output_unit, '("Expecting a primary, found ''", 1A, "''")') &
& trim (lexer_token_string(token%token_no))
case default
write (output_unit, '(1A, ": Expecting ''", 1A, "'', found ''", 1A, "''")') &
& trim (message), trim (lexer_token_string(expected_token_no)), &
& trim (lexer_token_string(token%token_no))
end select
end subroutine syntax_error_message
subroutine output_ast_flattened (unit_no, ast)
integer, intent(in) :: unit_no
type(ast_t), intent(in) :: ast
call output_ast_node_flattened (unit_no, ast%node)
end subroutine output_ast_flattened
recursive subroutine output_ast_node_flattened (unit_no, node)
integer, intent(in) :: unit_no
type(ast_node_t), pointer, intent(in) :: node
if (.not. associated (node)) then
write (unit_no, '(";")')
else
if (allocated (node%val)) then
write (unit_no, '(1A16, 2X, 1A)') &
& node_variety_string(node%node_variety), node%val
else
write (unit_no, '(1A)') &
& trim (node_variety_string(node%node_variety))
call output_ast_node_flattened (unit_no, node%left)
call output_ast_node_flattened (unit_no, node%right)
end if
end if
end subroutine output_ast_node_flattened
end module syntactic_analysis
program parse
use, intrinsic :: iso_fortran_env, only: input_unit
use, intrinsic :: iso_fortran_env, only: output_unit
use, intrinsic :: iso_fortran_env, only: error_unit
use, non_intrinsic :: syntactic_analysis
implicit none
integer, parameter :: inp_unit_no = 100
integer, parameter :: outp_unit_no = 101
integer :: arg_count
character(200) :: arg
integer :: inp
integer :: outp
arg_count = command_argument_count ()
if (3 <= arg_count) then
call print_usage
else
if (arg_count == 0) then
inp = input_unit
outp = output_unit
else if (arg_count == 1) then
call get_command_argument (1, arg)
inp = open_for_input (trim (arg))
outp = output_unit
else if (arg_count == 2) then
call get_command_argument (1, arg)
inp = open_for_input (trim (arg))
call get_command_argument (2, arg)
outp = open_for_output (trim (arg))
end if
block
type(ast_t) :: ast
ast = parse_token_stream (inp)
call output_ast_flattened (outp, ast)
end block
end if
contains
function open_for_input (filename) result (unit_no)
character(*), intent(in) :: filename
integer :: unit_no
integer :: stat
open (unit = inp_unit_no, file = filename, status = 'old', &
& action = 'read', access = 'stream', form = 'unformatted', &
& iostat = stat)
if (stat /= 0) then
write (error_unit, '("Error: failed to open ", 1A, " for input")') filename
stop 1
end if
unit_no = inp_unit_no
end function open_for_input
function open_for_output (filename) result (unit_no)
character(*), intent(in) :: filename
integer :: unit_no
integer :: stat
open (unit = outp_unit_no, file = filename, action = 'write', iostat = stat)
if (stat /= 0) then
write (error_unit, '("Error: failed to open ", 1A, " for output")') filename
stop 1
end if
unit_no = outp_unit_no
end function open_for_output
subroutine print_usage
character(200) :: progname
call get_command_argument (0, progname)
write (output_unit, '("Usage: ", 1A, " [INPUT_FILE [OUTPUT_FILE]]")') &
& trim (progname)
end subroutine print_usage
end program parse