RosettaCodeData/Task/Compiler-lexical-analyzer/FreeBASIC/compiler-lexical-analyzer.b...

288 lines
8.7 KiB
Plaintext

enum Token_type
tk_EOI
tk_Mul
tk_Div
tk_Mod
tk_Add
tk_Sub
tk_Negate
tk_Not
tk_Lss
tk_Leq
tk_Gtr
tk_Geq
tk_Eq
tk_Neq
tk_Assign
tk_And
tk_Or
tk_If
tk_Else
tk_While
tk_Print
tk_Putc
tk_Lparen
tk_Rparen
tk_Lbrace
tk_Rbrace
tk_Semi
tk_Comma
tk_Ident
tk_Integer
tk_String
end enum
const NewLine = chr(10)
const DoubleQuote = chr(34)
const BackSlash = chr(92)
' where we store keywords and variables
type Symbol
s_name as string
tok as Token_type
end type
dim shared symtab() as Symbol
dim shared cur_line as string
dim shared cur_ch as string
dim shared line_num as integer
dim shared col_num as integer
function is_digit(byval ch as string) as long
is_digit = ch >= "0" AndAlso ch <= "9"
end function
function is_alnum(byval ch as string) as long
is_alnum = (ucase(ch) >= "A" AndAlso ucase(ch) <= "Z") OrElse is_digit(ch)
end function
sub error_msg(byval eline as integer, byval ecol as integer, byval msg as string)
print "("; eline; ":"; ecol; ") "; msg
print : print "Hit any to end program"
sleep
system
end sub
' add an identifier to the symbol table
function install(byval s_name as string, byval tok as Token_type) as integer
dim n as integer = ubound(symtab) + 1
redim preserve symtab(n)
symtab(n).s_name = s_name
symtab(n).tok = tok
return n
end function
' search for an identifier in the symbol table
function lookup(byval s_name as string) as integer
dim i as integer
for i = lbound(symtab) to ubound(symtab)
if symtab(i).s_name = s_name then return i
next
return -1
end function
sub next_line() ' read the next line of input from the source file
cur_line = ""
cur_ch = "" ' empty cur_ch means end-of-file
if eof(1) then exit sub
line input #1, cur_line
cur_line = cur_line + NewLine
line_num += + 1
col_num = 1
end sub
sub next_char() ' get the next char
cur_ch = ""
col_num += 1
if col_num > len(cur_line) then next_line()
if col_num <= len(cur_line) then cur_ch = mid(cur_line, col_num, 1)
end sub
function follow(byval err_line as integer, byval err_col as integer, byval expect as string, byval ifyes as Token_type, byval ifno as Token_type) as Token_type
if cur_ch = expect then
next_char()
return ifyes
end if
if ifno = tk_eoi then error_msg(err_line, err_col, "follow unrecognized character: " + cur_ch)
return ifno
end function
sub gettok(byref err_line as integer, byref err_col as integer, byref tok as Token_type, byref v as string)
' skip whitespace
do while (cur_ch = " " or cur_ch = chr(9) or cur_ch = NewLine) and (cur_ch <> "")
next_char()
loop
err_line = line_num
err_col = col_num
select case cur_ch
case "": tok = tk_eoi: exit sub
case "{": tok = tk_lbrace: next_char(): exit sub
case "}": tok = tk_rbrace: next_char(): exit sub
case "(": tok = tk_lparen: next_char(): exit sub
case ")": tok = tk_rparen: next_char(): exit sub
case "+": tok = tk_add: next_char(): exit sub
case "-": tok = tk_sub: next_char(): exit sub
case "*": tok = tk_mul: next_char(): exit sub
case "%": tok = tk_Mod: next_char(): exit sub
case ";": tok = tk_semi: next_char(): exit sub
case ",": tok = tk_comma: next_char(): exit sub
case "/": ' div or comment
next_char()
if cur_ch <> "*" then
tok = tk_div
exit sub
end if
' skip comments
next_char()
do
if cur_ch = "*" then
next_char()
if cur_ch = "/" then
next_char()
gettok(err_line, err_col, tok, v)
exit sub
end if
elseif cur_ch = "" then error_msg(err_line, err_col, "EOF in comment")
else
next_char()
end if
loop
case "'": ' single char literals
next_char()
v = str(asc(cur_ch))
if cur_ch = "'" then error_msg(err_line, err_col, "empty character constant")
if cur_ch = BackSlash then
next_char()
if cur_ch = "n" then
v = "10"
elseif cur_ch = BackSlash then
v = "92"
else error_msg(err_line, err_col, "unknown escape sequence: " + cur_ch)
end if
end if
next_char()
if cur_ch <> "'" then error_msg(err_line, err_col, "multi-character constant")
next_char()
tok = tk_integer
exit sub
case "<": next_char(): tok = follow(err_line, err_col, "=", tk_Leq, tk_Lss): exit sub
case ">": next_char(): tok = follow(err_line, err_col, "=", tk_Geq, tk_Gtr): exit sub
case "!": next_char(): tok = follow(err_line, err_col, "=", tk_Neq, tk_Not): exit sub
case "=": next_char(): tok = follow(err_line, err_col, "=", tk_Eq, tk_Assign): exit sub
case "&": next_char(): tok = follow(err_line, err_col, "&", tk_And, tk_EOI): exit sub
case "|": next_char(): tok = follow(err_line, err_col, "|", tk_Or, tk_EOI): exit sub
case DoubleQuote: ' string
v = cur_ch
next_char()
do while cur_ch <> DoubleQuote
if cur_ch = NewLine then error_msg(err_line, err_col, "EOL in string")
if cur_ch = "" then error_msg(err_line, err_col, "EOF in string")
v += cur_ch
next_char()
loop
v += cur_ch
next_char()
tok = tk_string
exit sub
case else ' integers or identifiers
dim is_number as boolean = is_digit(cur_ch)
v = ""
do while is_alnum(cur_ch) orelse cur_ch = "_"
if not is_digit(cur_ch) then is_number = false
v += cur_ch
next_char()
loop
if len(v) = 0 then error_msg(err_line, err_col, "unknown character: " + cur_ch)
if is_digit(mid(v, 1, 1)) then
if not is_number then error_msg(err_line, err_col, "invalid number: " + v)
tok = tk_integer
exit sub
end if
dim as integer index = lookup(v)
if index = -1 then
tok = tk_ident
else
tok = symtab(index).tok
end if
exit sub
end select
end sub
sub init_lex(byval filein as string)
install("else", tk_else)
install("if", tk_if)
install("print", tk_print)
install("putc", tk_putc)
install("while", tk_while)
open filein for input as #1
cur_line = ""
line_num = 0
col_num = 0
next_char()
end sub
sub scanner()
dim err_line as integer
dim err_col as integer
dim tok as Token_type
dim v as string
dim tok_list(tk_eoi to tk_string) as string
tok_list(tk_EOI ) = "End_of_input"
tok_list(tk_Mul ) = "Op_multiply"
tok_list(tk_Div ) = "Op_divide"
tok_list(tk_Mod ) = "Op_mod"
tok_list(tk_Add ) = "Op_add"
tok_list(tk_Sub ) = "Op_subtract"
tok_list(tk_Negate ) = "Op_negate"
tok_list(tk_Not ) = "Op_not"
tok_list(tk_Lss ) = "Op_less"
tok_list(tk_Leq ) = "Op_lessequal"
tok_list(tk_Gtr ) = "Op_greater"
tok_list(tk_Geq ) = "Op_greaterequal"
tok_list(tk_Eq ) = "Op_equal"
tok_list(tk_Neq ) = "Op_notequal"
tok_list(tk_Assign ) = "Op_assign"
tok_list(tk_And ) = "Op_and"
tok_list(tk_Or ) = "Op_or"
tok_list(tk_If ) = "Keyword_if"
tok_list(tk_Else ) = "Keyword_else"
tok_list(tk_While ) = "Keyword_while"
tok_list(tk_Print ) = "Keyword_print"
tok_list(tk_Putc ) = "Keyword_putc"
tok_list(tk_Lparen ) = "LeftParen"
tok_list(tk_Rparen ) = "RightParen"
tok_list(tk_Lbrace ) = "LeftBrace"
tok_list(tk_Rbrace ) = "RightBrace"
tok_list(tk_Semi ) = "Semicolon"
tok_list(tk_Comma ) = "Comma"
tok_list(tk_Ident ) = "Identifier"
tok_list(tk_Integer) = "Integer"
tok_list(tk_String ) = "String"
do
gettok(err_line, err_col, tok, v)
print using "##### ##### \ " + BackSlash; err_line; err_col; tok_list(tok);
if tok = tk_integer orelse tok = tk_ident orelse tok = tk_string then print " " + v;
print
loop until tok = tk_eoi
end sub
sub main()
if command(1) = "" then print "filename required" : exit sub
init_lex(command(1))
scanner()
end sub
main()
print : print "Hit any to end program"
sleep
system