288 lines
8.7 KiB
Plaintext
288 lines
8.7 KiB
Plaintext
enum Token_type
|
|
tk_EOI
|
|
tk_Mul
|
|
tk_Div
|
|
tk_Mod
|
|
tk_Add
|
|
tk_Sub
|
|
tk_Negate
|
|
tk_Not
|
|
tk_Lss
|
|
tk_Leq
|
|
tk_Gtr
|
|
tk_Geq
|
|
tk_Eq
|
|
tk_Neq
|
|
tk_Assign
|
|
tk_And
|
|
tk_Or
|
|
tk_If
|
|
tk_Else
|
|
tk_While
|
|
tk_Print
|
|
tk_Putc
|
|
tk_Lparen
|
|
tk_Rparen
|
|
tk_Lbrace
|
|
tk_Rbrace
|
|
tk_Semi
|
|
tk_Comma
|
|
tk_Ident
|
|
tk_Integer
|
|
tk_String
|
|
end enum
|
|
|
|
const NewLine = chr(10)
|
|
const DoubleQuote = chr(34)
|
|
const BackSlash = chr(92)
|
|
|
|
' where we store keywords and variables
|
|
type Symbol
|
|
s_name as string
|
|
tok as Token_type
|
|
end type
|
|
|
|
dim shared symtab() as Symbol
|
|
|
|
dim shared cur_line as string
|
|
dim shared cur_ch as string
|
|
dim shared line_num as integer
|
|
dim shared col_num as integer
|
|
|
|
function is_digit(byval ch as string) as long
|
|
is_digit = ch >= "0" AndAlso ch <= "9"
|
|
end function
|
|
|
|
function is_alnum(byval ch as string) as long
|
|
is_alnum = (ucase(ch) >= "A" AndAlso ucase(ch) <= "Z") OrElse is_digit(ch)
|
|
end function
|
|
|
|
sub error_msg(byval eline as integer, byval ecol as integer, byval msg as string)
|
|
print "("; eline; ":"; ecol; ") "; msg
|
|
print : print "Hit any to end program"
|
|
sleep
|
|
system
|
|
end sub
|
|
|
|
' add an identifier to the symbol table
|
|
function install(byval s_name as string, byval tok as Token_type) as integer
|
|
dim n as integer = ubound(symtab) + 1
|
|
redim preserve symtab(n)
|
|
|
|
symtab(n).s_name = s_name
|
|
symtab(n).tok = tok
|
|
return n
|
|
end function
|
|
|
|
' search for an identifier in the symbol table
|
|
function lookup(byval s_name as string) as integer
|
|
dim i as integer
|
|
|
|
for i = lbound(symtab) to ubound(symtab)
|
|
if symtab(i).s_name = s_name then return i
|
|
next
|
|
return -1
|
|
end function
|
|
|
|
sub next_line() ' read the next line of input from the source file
|
|
cur_line = ""
|
|
cur_ch = "" ' empty cur_ch means end-of-file
|
|
if eof(1) then exit sub
|
|
line input #1, cur_line
|
|
cur_line = cur_line + NewLine
|
|
line_num += + 1
|
|
col_num = 1
|
|
end sub
|
|
|
|
sub next_char() ' get the next char
|
|
cur_ch = ""
|
|
col_num += 1
|
|
if col_num > len(cur_line) then next_line()
|
|
if col_num <= len(cur_line) then cur_ch = mid(cur_line, col_num, 1)
|
|
end sub
|
|
|
|
function follow(byval err_line as integer, byval err_col as integer, byval expect as string, byval ifyes as Token_type, byval ifno as Token_type) as Token_type
|
|
if cur_ch = expect then
|
|
next_char()
|
|
return ifyes
|
|
end if
|
|
if ifno = tk_eoi then error_msg(err_line, err_col, "follow unrecognized character: " + cur_ch)
|
|
return ifno
|
|
end function
|
|
|
|
sub gettok(byref err_line as integer, byref err_col as integer, byref tok as Token_type, byref v as string)
|
|
' skip whitespace
|
|
do while (cur_ch = " " or cur_ch = chr(9) or cur_ch = NewLine) and (cur_ch <> "")
|
|
next_char()
|
|
loop
|
|
|
|
err_line = line_num
|
|
err_col = col_num
|
|
|
|
select case cur_ch
|
|
case "": tok = tk_eoi: exit sub
|
|
case "{": tok = tk_lbrace: next_char(): exit sub
|
|
case "}": tok = tk_rbrace: next_char(): exit sub
|
|
case "(": tok = tk_lparen: next_char(): exit sub
|
|
case ")": tok = tk_rparen: next_char(): exit sub
|
|
case "+": tok = tk_add: next_char(): exit sub
|
|
case "-": tok = tk_sub: next_char(): exit sub
|
|
case "*": tok = tk_mul: next_char(): exit sub
|
|
case "%": tok = tk_Mod: next_char(): exit sub
|
|
case ";": tok = tk_semi: next_char(): exit sub
|
|
case ",": tok = tk_comma: next_char(): exit sub
|
|
case "/": ' div or comment
|
|
next_char()
|
|
if cur_ch <> "*" then
|
|
tok = tk_div
|
|
exit sub
|
|
end if
|
|
' skip comments
|
|
next_char()
|
|
do
|
|
if cur_ch = "*" then
|
|
next_char()
|
|
if cur_ch = "/" then
|
|
next_char()
|
|
gettok(err_line, err_col, tok, v)
|
|
exit sub
|
|
end if
|
|
elseif cur_ch = "" then error_msg(err_line, err_col, "EOF in comment")
|
|
else
|
|
next_char()
|
|
end if
|
|
loop
|
|
case "'": ' single char literals
|
|
next_char()
|
|
v = str(asc(cur_ch))
|
|
if cur_ch = "'" then error_msg(err_line, err_col, "empty character constant")
|
|
if cur_ch = BackSlash then
|
|
next_char()
|
|
if cur_ch = "n" then
|
|
v = "10"
|
|
elseif cur_ch = BackSlash then
|
|
v = "92"
|
|
else error_msg(err_line, err_col, "unknown escape sequence: " + cur_ch)
|
|
end if
|
|
end if
|
|
next_char()
|
|
if cur_ch <> "'" then error_msg(err_line, err_col, "multi-character constant")
|
|
next_char()
|
|
tok = tk_integer
|
|
exit sub
|
|
case "<": next_char(): tok = follow(err_line, err_col, "=", tk_Leq, tk_Lss): exit sub
|
|
case ">": next_char(): tok = follow(err_line, err_col, "=", tk_Geq, tk_Gtr): exit sub
|
|
case "!": next_char(): tok = follow(err_line, err_col, "=", tk_Neq, tk_Not): exit sub
|
|
case "=": next_char(): tok = follow(err_line, err_col, "=", tk_Eq, tk_Assign): exit sub
|
|
case "&": next_char(): tok = follow(err_line, err_col, "&", tk_And, tk_EOI): exit sub
|
|
case "|": next_char(): tok = follow(err_line, err_col, "|", tk_Or, tk_EOI): exit sub
|
|
case DoubleQuote: ' string
|
|
v = cur_ch
|
|
next_char()
|
|
do while cur_ch <> DoubleQuote
|
|
if cur_ch = NewLine then error_msg(err_line, err_col, "EOL in string")
|
|
if cur_ch = "" then error_msg(err_line, err_col, "EOF in string")
|
|
v += cur_ch
|
|
next_char()
|
|
loop
|
|
v += cur_ch
|
|
next_char()
|
|
tok = tk_string
|
|
exit sub
|
|
case else ' integers or identifiers
|
|
dim is_number as boolean = is_digit(cur_ch)
|
|
v = ""
|
|
do while is_alnum(cur_ch) orelse cur_ch = "_"
|
|
if not is_digit(cur_ch) then is_number = false
|
|
v += cur_ch
|
|
next_char()
|
|
loop
|
|
if len(v) = 0 then error_msg(err_line, err_col, "unknown character: " + cur_ch)
|
|
if is_digit(mid(v, 1, 1)) then
|
|
if not is_number then error_msg(err_line, err_col, "invalid number: " + v)
|
|
tok = tk_integer
|
|
exit sub
|
|
end if
|
|
dim as integer index = lookup(v)
|
|
if index = -1 then
|
|
tok = tk_ident
|
|
else
|
|
tok = symtab(index).tok
|
|
end if
|
|
exit sub
|
|
end select
|
|
end sub
|
|
|
|
sub init_lex(byval filein as string)
|
|
install("else", tk_else)
|
|
install("if", tk_if)
|
|
install("print", tk_print)
|
|
install("putc", tk_putc)
|
|
install("while", tk_while)
|
|
|
|
open filein for input as #1
|
|
|
|
cur_line = ""
|
|
line_num = 0
|
|
col_num = 0
|
|
next_char()
|
|
end sub
|
|
|
|
sub scanner()
|
|
dim err_line as integer
|
|
dim err_col as integer
|
|
dim tok as Token_type
|
|
dim v as string
|
|
dim tok_list(tk_eoi to tk_string) as string
|
|
|
|
tok_list(tk_EOI ) = "End_of_input"
|
|
tok_list(tk_Mul ) = "Op_multiply"
|
|
tok_list(tk_Div ) = "Op_divide"
|
|
tok_list(tk_Mod ) = "Op_mod"
|
|
tok_list(tk_Add ) = "Op_add"
|
|
tok_list(tk_Sub ) = "Op_subtract"
|
|
tok_list(tk_Negate ) = "Op_negate"
|
|
tok_list(tk_Not ) = "Op_not"
|
|
tok_list(tk_Lss ) = "Op_less"
|
|
tok_list(tk_Leq ) = "Op_lessequal"
|
|
tok_list(tk_Gtr ) = "Op_greater"
|
|
tok_list(tk_Geq ) = "Op_greaterequal"
|
|
tok_list(tk_Eq ) = "Op_equal"
|
|
tok_list(tk_Neq ) = "Op_notequal"
|
|
tok_list(tk_Assign ) = "Op_assign"
|
|
tok_list(tk_And ) = "Op_and"
|
|
tok_list(tk_Or ) = "Op_or"
|
|
tok_list(tk_If ) = "Keyword_if"
|
|
tok_list(tk_Else ) = "Keyword_else"
|
|
tok_list(tk_While ) = "Keyword_while"
|
|
tok_list(tk_Print ) = "Keyword_print"
|
|
tok_list(tk_Putc ) = "Keyword_putc"
|
|
tok_list(tk_Lparen ) = "LeftParen"
|
|
tok_list(tk_Rparen ) = "RightParen"
|
|
tok_list(tk_Lbrace ) = "LeftBrace"
|
|
tok_list(tk_Rbrace ) = "RightBrace"
|
|
tok_list(tk_Semi ) = "Semicolon"
|
|
tok_list(tk_Comma ) = "Comma"
|
|
tok_list(tk_Ident ) = "Identifier"
|
|
tok_list(tk_Integer) = "Integer"
|
|
tok_list(tk_String ) = "String"
|
|
|
|
do
|
|
gettok(err_line, err_col, tok, v)
|
|
print using "##### ##### \ " + BackSlash; err_line; err_col; tok_list(tok);
|
|
if tok = tk_integer orelse tok = tk_ident orelse tok = tk_string then print " " + v;
|
|
print
|
|
loop until tok = tk_eoi
|
|
end sub
|
|
|
|
sub main()
|
|
if command(1) = "" then print "filename required" : exit sub
|
|
init_lex(command(1))
|
|
scanner()
|
|
end sub
|
|
|
|
main()
|
|
print : print "Hit any to end program"
|
|
sleep
|
|
system
|