932 lines
26 KiB
Plaintext
932 lines
26 KiB
Plaintext
(********************************************************************)
|
|
(* Usage: parse [INPUTFILE [OUTPUTFILE]]
|
|
If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input
|
|
or standard output is used, respectively. *)
|
|
|
|
#include "share/atspre_staload.hats"
|
|
staload UN = "prelude/SATS/unsafe.sats"
|
|
|
|
#define NIL list_nil ()
|
|
#define :: list_cons
|
|
|
|
%{^
|
|
/* alloca(3) is needed for ATS exceptions. */
|
|
#include <alloca.h>
|
|
%}
|
|
|
|
(********************************************************************)
|
|
|
|
#define NUM_TOKENS 31
|
|
|
|
#define TOKEN_ELSE 0
|
|
#define TOKEN_IF 1
|
|
#define TOKEN_PRINT 2
|
|
#define TOKEN_PUTC 3
|
|
#define TOKEN_WHILE 4
|
|
#define TOKEN_MULTIPLY 5
|
|
#define TOKEN_DIVIDE 6
|
|
#define TOKEN_MOD 7
|
|
#define TOKEN_ADD 8
|
|
#define TOKEN_SUBTRACT 9
|
|
#define TOKEN_NEGATE 10
|
|
#define TOKEN_LESS 11
|
|
#define TOKEN_LESSEQUAL 12
|
|
#define TOKEN_GREATER 13
|
|
#define TOKEN_GREATEREQUAL 14
|
|
#define TOKEN_EQUAL 15
|
|
#define TOKEN_NOTEQUAL 16
|
|
#define TOKEN_NOT 17
|
|
#define TOKEN_ASSIGN 18
|
|
#define TOKEN_AND 19
|
|
#define TOKEN_OR 20
|
|
#define TOKEN_LEFTPAREN 21
|
|
#define TOKEN_RIGHTPAREN 22
|
|
#define TOKEN_LEFTBRACE 23
|
|
#define TOKEN_RIGHTBRACE 24
|
|
#define TOKEN_SEMICOLON 25
|
|
#define TOKEN_COMMA 26
|
|
#define TOKEN_IDENTIFIER 27
|
|
#define TOKEN_INTEGER 28
|
|
#define TOKEN_STRING 29
|
|
#define TOKEN_END_OF_INPUT 30
|
|
|
|
typedef token_t =
|
|
[i : int | TOKEN_ELSE <= i; i <= TOKEN_END_OF_INPUT]
|
|
int i
|
|
typedef tokentuple_t = (token_t, String, ullint, ullint)
|
|
|
|
fn
|
|
token_text (tok : token_t) : String =
|
|
case+ tok of
|
|
| TOKEN_ELSE => "else"
|
|
| TOKEN_IF => "if"
|
|
| TOKEN_PRINT => "print"
|
|
| TOKEN_PUTC => "putc"
|
|
| TOKEN_WHILE => "while"
|
|
| TOKEN_MULTIPLY => "*"
|
|
| TOKEN_DIVIDE => "/"
|
|
| TOKEN_MOD => "%"
|
|
| TOKEN_ADD => "+"
|
|
| TOKEN_SUBTRACT => "-"
|
|
| TOKEN_NEGATE => "-"
|
|
| TOKEN_LESS => "<"
|
|
| TOKEN_LESSEQUAL => "<="
|
|
| TOKEN_GREATER => ">"
|
|
| TOKEN_GREATEREQUAL => ">="
|
|
| TOKEN_EQUAL => "=="
|
|
| TOKEN_NOTEQUAL => "!="
|
|
| TOKEN_NOT => "!"
|
|
| TOKEN_ASSIGN => "="
|
|
| TOKEN_AND => "&&"
|
|
| TOKEN_OR => "||"
|
|
| TOKEN_LEFTPAREN => "("
|
|
| TOKEN_RIGHTPAREN => ")"
|
|
| TOKEN_LEFTBRACE => "{"
|
|
| TOKEN_RIGHTBRACE => "}"
|
|
| TOKEN_SEMICOLON => ";"
|
|
| TOKEN_COMMA => ","
|
|
| TOKEN_IDENTIFIER => "Ident"
|
|
| TOKEN_INTEGER => "Integer literal"
|
|
| TOKEN_STRING => "String literal"
|
|
| TOKEN_END_OF_INPUT => "EOI"
|
|
|
|
(********************************************************************)
|
|
(* A perfect hash for the lexical token names.
|
|
|
|
This hash was generated by GNU gperf and then translated to
|
|
reasonable ATS by hand. Note, though, that one could have embedded
|
|
the generated C code directly and used it. *)
|
|
|
|
#define MIN_WORD_LENGTH 5
|
|
#define MAX_WORD_LENGTH 15
|
|
#define MIN_HASH_VALUE 5
|
|
#define MAX_HASH_VALUE 64
|
|
#define HASH_TABLE_SIZE 65
|
|
|
|
local
|
|
extern castfn u : {n : nat | n < 256} int n -<> uint8 n
|
|
in
|
|
vtypedef asso_values_vt = @[[n : nat | n < 256] uint8 n][256]
|
|
|
|
var asso_values =
|
|
@[[n : nat | n < 256] uint8 n][256]
|
|
(u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 10, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 0, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 0, u 65, u 25,
|
|
u 5, u 5, u 0, u 15, u 65, u 0, u 65, u 65, u 10, u 65,
|
|
u 30, u 0, u 65, u 5, u 10, u 10, u 0, u 15, u 65, u 65,
|
|
u 65, u 5, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65, u 65,
|
|
u 65, u 65, u 65, u 65, u 65, u 65)
|
|
end
|
|
|
|
fn
|
|
get_asso_value {i : nat | i < 256}
|
|
(i : uint i) :<>
|
|
[n : nat | n < 256] uint n =
|
|
let
|
|
extern castfn u8ui : {n : nat} uint8 n -<> uint n
|
|
extern castfn mk_asso_values :<>
|
|
{p : addr} ptr p -<> (asso_values_vt @ p | ptr p)
|
|
|
|
val asso_values_tup = mk_asso_values (addr@ asso_values)
|
|
macdef asso_values = !(asso_values_tup.1)
|
|
val retval = asso_values[i]
|
|
val _ = $UN.castvwtp0{void} asso_values_tup
|
|
in
|
|
u8ui retval
|
|
end
|
|
|
|
fn
|
|
hash {n : int | MIN_WORD_LENGTH <= n; n <= MAX_WORD_LENGTH}
|
|
(str : string n,
|
|
len : size_t n) :<>
|
|
[key : nat] uint key =
|
|
let
|
|
extern castfn uc2ui : {n : nat} uchar n -<> uint n
|
|
|
|
val c1 = uc2ui (c2uc str[4])
|
|
val c2 = uc2ui (c2uc str[pred len])
|
|
in
|
|
sz2u len + get_asso_value c1 + get_asso_value c2
|
|
end
|
|
|
|
typedef wordlist_vt = @[(String, token_t)][HASH_TABLE_SIZE]
|
|
|
|
var wordlist =
|
|
@[(String, token_t)][HASH_TABLE_SIZE]
|
|
(("", 0), ("", 0), ("", 0), ("", 0), ("", 0),
|
|
("Comma", 26),
|
|
("Op_not", 17),
|
|
("", 0), ("", 0), ("", 0),
|
|
("Keyword_if", 1),
|
|
("Op_mod", 7),
|
|
("End_of_input", 30),
|
|
("Keyword_print", 2),
|
|
("Op_divide", 6),
|
|
("RightBrace", 24),
|
|
("Op_add", 8),
|
|
("Keyword_else", 0),
|
|
("Keyword_while", 4),
|
|
("Op_negate", 10),
|
|
("Identifier", 27),
|
|
("Op_notequal", 16),
|
|
("Op_less", 11),
|
|
("Op_equal", 15),
|
|
("LeftBrace", 23),
|
|
("Op_or", 20),
|
|
("Op_subtract", 9),
|
|
("Op_lessequal", 12),
|
|
("", 0), ("", 0),
|
|
("Op_greater", 13),
|
|
("Op_multiply", 5 ),
|
|
("Integer", 28),
|
|
("", 0), ("", 0),
|
|
("Op_greaterequal", 14),
|
|
("", 0),
|
|
("Keyword_putc", 3),
|
|
("", 0),
|
|
("LeftParen", 21),
|
|
("RightParen", 22),
|
|
("Op_and", 19),
|
|
("", 0), ("", 0), ("", 0), ("", 0), ("", 0), ("", 0), ("", 0),
|
|
("Op_assign", 18),
|
|
("", 0),
|
|
("String", 29),
|
|
("", 0), ("", 0), ("", 0), ("", 0), ("", 0), ("", 0), ("", 0),
|
|
("", 0), ("", 0), ("", 0), ("", 0), ("", 0),
|
|
("Semicolon", 25))
|
|
|
|
fn
|
|
get_wordlist_entry
|
|
{n : nat | n <= MAX_HASH_VALUE}
|
|
(key : uint n) :<> (String, token_t) =
|
|
let
|
|
extern castfn mk_wordlist_tup :<>
|
|
{p : addr} ptr p -<> (wordlist_vt @ p | ptr p)
|
|
|
|
val wordlist_tup = mk_wordlist_tup (addr@ wordlist)
|
|
macdef wordlist = !(wordlist_tup.1)
|
|
val retval = wordlist[key]
|
|
val _ = $UN.castvwtp0{void} wordlist_tup
|
|
in
|
|
retval
|
|
end
|
|
|
|
fn
|
|
string2token_t_opt
|
|
{n : int}
|
|
(str : string n) :<>
|
|
Option token_t =
|
|
let
|
|
val len = string_length str
|
|
in
|
|
if len < i2sz MIN_WORD_LENGTH then
|
|
None ()
|
|
else if i2sz MAX_WORD_LENGTH < len then
|
|
None ()
|
|
else
|
|
let
|
|
val key = hash (str, len)
|
|
in
|
|
if i2u MAX_HASH_VALUE < key then
|
|
None ()
|
|
else
|
|
let
|
|
val (s, tok) = get_wordlist_entry (key)
|
|
in
|
|
if str <> s then
|
|
None ()
|
|
else
|
|
Some tok
|
|
end
|
|
end
|
|
end
|
|
|
|
(********************************************************************)
|
|
|
|
exception bad_lex_integer of (String)
|
|
exception bad_lex_token_name of (String)
|
|
exception bad_string_literal of (String)
|
|
|
|
extern fun {}
|
|
skip_something$pred : char -<> bool
|
|
fn {}
|
|
skip_something {n : nat}
|
|
{i : nat | i <= n}
|
|
(s : string n,
|
|
n : size_t n,
|
|
i : size_t i) :<>
|
|
[j : nat | i <= j; j <= n]
|
|
size_t j =
|
|
let
|
|
fun
|
|
loop {k : nat | i <= k; k <= n} .<n - k>.
|
|
(k : size_t k) :<>
|
|
[j : nat | i <= j; j <= n]
|
|
size_t j =
|
|
if k = n then
|
|
k
|
|
else if ~(skip_something$pred<> s[k]) then
|
|
k
|
|
else
|
|
loop (succ k)
|
|
in
|
|
loop i
|
|
end
|
|
|
|
fn
|
|
skip_space {n : nat}
|
|
{i : nat | i <= n}
|
|
(s : string n,
|
|
n : size_t n,
|
|
i : size_t i) :<>
|
|
[j : nat | i <= j; j <= n]
|
|
size_t j =
|
|
let
|
|
implement skip_something$pred<> (c) = isspace c
|
|
in
|
|
skip_something (s, n, i)
|
|
end
|
|
|
|
fn
|
|
skip_nonspace {n : nat}
|
|
{i : nat | i <= n}
|
|
(s : string n,
|
|
n : size_t n,
|
|
i : size_t i) :<>
|
|
[j : nat | i <= j; j <= n]
|
|
size_t j =
|
|
let
|
|
implement skip_something$pred<> (c) = ~isspace c
|
|
in
|
|
skip_something (s, n, i)
|
|
end
|
|
|
|
fn
|
|
skip_nonquote {n : nat}
|
|
{i : nat | i <= n}
|
|
(s : string n,
|
|
n : size_t n,
|
|
i : size_t i) :<>
|
|
[j : nat | i <= j; j <= n]
|
|
size_t j =
|
|
let
|
|
implement skip_something$pred<> (c) = c <> '"'
|
|
in
|
|
skip_something (s, n, i)
|
|
end
|
|
|
|
fn
|
|
skip_string_literal
|
|
{n : nat}
|
|
{i : nat | i <= n}
|
|
(s : string n,
|
|
n : size_t n,
|
|
i : size_t i) :<>
|
|
[j : nat | i <= j; j <= n]
|
|
size_t j =
|
|
if i = n then
|
|
i
|
|
else if s[i] <> '"' then
|
|
i
|
|
else
|
|
let
|
|
val j = skip_nonquote (s, n, succ i)
|
|
in
|
|
if j = n then
|
|
i
|
|
else
|
|
succ j
|
|
end
|
|
|
|
fn
|
|
get_substr {n, i, j : nat | i <= j; j <= n}
|
|
(s : string n,
|
|
i : size_t i,
|
|
j : size_t j) :
|
|
[m : int | m == j - i] string m =
|
|
let
|
|
val s = string_make_substring (s, i, j - i)
|
|
in
|
|
strnptr2string s
|
|
end
|
|
|
|
fn
|
|
string2ullint
|
|
{n : nat}
|
|
(s : string n) : ullint =
|
|
let
|
|
val n = string_length s
|
|
in
|
|
if n = i2sz 0 then
|
|
$raise bad_lex_integer ("")
|
|
else
|
|
let
|
|
extern castfn u2ull : uint -<> ullint
|
|
|
|
fun
|
|
evaluate {k : nat | k <= n} .<n - k>.
|
|
(k : size_t k,
|
|
v : ullint) : ullint =
|
|
if k = n then
|
|
v
|
|
else if ~isdigit s[k] then
|
|
$raise bad_lex_integer (s)
|
|
else
|
|
let
|
|
val d = char2ui s[k] - char2ui '0'
|
|
in
|
|
evaluate (succ k, (10ULL * v) + u2ull d)
|
|
end
|
|
in
|
|
evaluate (i2sz 0, 0ULL)
|
|
end
|
|
end
|
|
|
|
fn
|
|
string2token {n : int}
|
|
(str : string n) : token_t =
|
|
case+ string2token_t_opt str of
|
|
| None () => $raise bad_lex_token_name (str)
|
|
| Some tok => tok
|
|
|
|
fn
|
|
read_lex_file (inpf : FILEref) : List0 tokentuple_t =
|
|
(* Convert the output of "lex" to a list of tokens. *)
|
|
(* This routine could stand to do more validation of the input. *)
|
|
let
|
|
fun
|
|
loop (lst : List0 tokentuple_t) : List0 tokentuple_t =
|
|
if fileref_is_eof inpf then
|
|
lst
|
|
else
|
|
let
|
|
val s = strptr2string (fileref_get_line_string inpf)
|
|
val n = string_length s
|
|
prval _ = lemma_g1uint_param n
|
|
|
|
val i0_line_no = skip_space (s, n, i2sz 0)
|
|
in
|
|
if i0_line_no = n then
|
|
(* Skip any blank lines, including end of file. *)
|
|
loop lst
|
|
else
|
|
let
|
|
val i1_line_no = skip_nonspace (s, n, i0_line_no)
|
|
val s_line_no = get_substr (s, i0_line_no, i1_line_no)
|
|
val line_no = string2ullint s_line_no
|
|
|
|
val i0_column_no = skip_space (s, n, i1_line_no)
|
|
val i1_column_no = skip_nonspace (s, n, i0_column_no)
|
|
val s_column_no = get_substr (s, i0_column_no,
|
|
i1_column_no)
|
|
val column_no = string2ullint s_column_no
|
|
|
|
val i0_tokname = skip_space (s, n, i1_column_no)
|
|
val i1_tokname = skip_nonspace (s, n, i0_tokname)
|
|
val tokname = get_substr (s, i0_tokname, i1_tokname)
|
|
val tok = string2token tokname
|
|
in
|
|
case+ tok of
|
|
| TOKEN_INTEGER =>
|
|
let
|
|
val i0 = skip_space (s, n, i1_tokname)
|
|
val i1 = skip_nonspace (s, n, i0)
|
|
val arg = get_substr (s, i0, i1)
|
|
val toktup = (tok, arg, line_no, column_no)
|
|
in
|
|
loop (toktup :: lst)
|
|
end
|
|
| TOKEN_IDENTIFIER =>
|
|
let
|
|
val i0 = skip_space (s, n, i1_tokname)
|
|
val i1 = skip_nonspace (s, n, i0)
|
|
val arg = get_substr (s, i0, i1)
|
|
val toktup = (tok, arg, line_no, column_no)
|
|
in
|
|
loop (toktup :: lst)
|
|
end
|
|
| TOKEN_STRING =>
|
|
let
|
|
val i0 = skip_space (s, n, i1_tokname)
|
|
val i1 = skip_string_literal (s, n, i0)
|
|
val arg = get_substr (s, i0, i1)
|
|
val toktup = (tok, arg, line_no, column_no)
|
|
in
|
|
loop (toktup :: lst)
|
|
end
|
|
| _ =>
|
|
let
|
|
val toktup = (tok, "", line_no, column_no)
|
|
in
|
|
loop (toktup :: lst)
|
|
end
|
|
end
|
|
end
|
|
in
|
|
list_vt2t (list_reverse (loop NIL))
|
|
end
|
|
|
|
(********************************************************************)
|
|
|
|
exception truncated_lexical of ()
|
|
exception unexpected_token of (tokentuple_t, token_t)
|
|
exception unexpected_primary of (tokentuple_t)
|
|
exception unterminated_statement_block of (ullint, ullint)
|
|
exception expected_a_statement of (tokentuple_t)
|
|
|
|
datatype node_t =
|
|
| node_t_nil of ()
|
|
| node_t_leaf of (String, String)
|
|
| node_t_cons of (String, node_t, node_t)
|
|
|
|
fn
|
|
right_assoc (tok : token_t) : bool =
|
|
(* None of the currently supported operators is right
|
|
associative. *)
|
|
false
|
|
|
|
fn
|
|
binary_op (tok : token_t) : bool =
|
|
case+ tok of
|
|
| TOKEN_ADD => true
|
|
| TOKEN_SUBTRACT => true
|
|
| TOKEN_MULTIPLY => true
|
|
| TOKEN_DIVIDE => true
|
|
| TOKEN_MOD => true
|
|
| TOKEN_LESS => true
|
|
| TOKEN_LESSEQUAL => true
|
|
| TOKEN_GREATER => true
|
|
| TOKEN_GREATEREQUAL => true
|
|
| TOKEN_EQUAL => true
|
|
| TOKEN_NOTEQUAL => true
|
|
| TOKEN_AND => true
|
|
| TOKEN_OR => true
|
|
| _ => false
|
|
|
|
fn
|
|
precedence (tok : token_t) : int =
|
|
case+ tok of
|
|
| TOKEN_MULTIPLY => 13
|
|
| TOKEN_DIVIDE => 13
|
|
| TOKEN_MOD => 13
|
|
| TOKEN_ADD => 12
|
|
| TOKEN_SUBTRACT => 12
|
|
| TOKEN_NEGATE => 14
|
|
| TOKEN_NOT => 14
|
|
| TOKEN_LESS => 10
|
|
| TOKEN_LESSEQUAL => 10
|
|
| TOKEN_GREATER => 10
|
|
| TOKEN_GREATEREQUAL => 10
|
|
| TOKEN_EQUAL => 9
|
|
| TOKEN_NOTEQUAL => 9
|
|
| TOKEN_AND => 5
|
|
| TOKEN_OR => 4
|
|
| _ => ~1
|
|
|
|
fn
|
|
opname (tok : token_t) : String =
|
|
case- tok of
|
|
| TOKEN_MULTIPLY => "Multiply"
|
|
| TOKEN_DIVIDE => "Divide"
|
|
| TOKEN_MOD => "Mod"
|
|
| TOKEN_ADD => "Add"
|
|
| TOKEN_SUBTRACT => "Subtract"
|
|
| TOKEN_NEGATE => "Negate"
|
|
| TOKEN_NOT => "Not"
|
|
| TOKEN_LESS => "Less"
|
|
| TOKEN_LESSEQUAL => "LessEqual"
|
|
| TOKEN_GREATER => "Greater"
|
|
| TOKEN_GREATEREQUAL => "GreaterEqual"
|
|
| TOKEN_EQUAL => "Equal"
|
|
| TOKEN_NOTEQUAL => "NotEqual"
|
|
| TOKEN_AND => "And"
|
|
| TOKEN_OR => "Or"
|
|
|
|
fn
|
|
parse (lex : List0 tokentuple_t) : node_t =
|
|
let
|
|
typedef toktups_t (n : int) = list (tokentuple_t, n)
|
|
typedef toktups_t = [n : nat] toktups_t n
|
|
|
|
fn
|
|
expect (expected : token_t,
|
|
lex : toktups_t) : toktups_t =
|
|
case+ lex of
|
|
| NIL => $raise truncated_lexical ()
|
|
| toktup :: tail =>
|
|
if toktup.0 = expected then
|
|
tail
|
|
else
|
|
$raise unexpected_token (toktup, expected)
|
|
|
|
fn
|
|
peek {n : int} (lex : toktups_t n) : [1 <= n] token_t =
|
|
case+ lex of
|
|
| NIL => $raise truncated_lexical ()
|
|
| (tok, _, _, _) :: _ => tok
|
|
|
|
fun
|
|
stmt (lex : toktups_t) : (node_t, toktups_t) =
|
|
case+ lex of
|
|
| NIL => $raise truncated_lexical ()
|
|
| (TOKEN_IF, _, _, _) :: lex =>
|
|
let
|
|
val (e, lex) = paren_expr lex
|
|
val (s, lex) = stmt lex
|
|
in
|
|
case+ lex of
|
|
| (TOKEN_ELSE, _, _, _) :: lex =>
|
|
let
|
|
val (t, lex) = stmt lex
|
|
in
|
|
(node_t_cons ("If", e, node_t_cons ("If", s, t)), lex)
|
|
end
|
|
| _ =>
|
|
let
|
|
(* There is no 'else' clause. *)
|
|
val t = node_t_nil ()
|
|
in
|
|
(node_t_cons ("If", e, node_t_cons ("If", s, t)), lex)
|
|
end
|
|
end
|
|
| (TOKEN_PUTC, _, _, _) :: lex =>
|
|
let
|
|
val (subtree, lex) = paren_expr lex
|
|
val subtree = node_t_cons ("Prtc", subtree, node_t_nil ())
|
|
val lex = expect (TOKEN_SEMICOLON, lex)
|
|
in
|
|
(subtree, lex)
|
|
end
|
|
| (TOKEN_PRINT, _, _, _) :: lex =>
|
|
let
|
|
val lex = expect (TOKEN_LEFTPAREN, lex)
|
|
fun
|
|
loop_over_args (subtree : node_t,
|
|
lex : toktups_t) : (node_t, toktups_t) =
|
|
case+ lex of
|
|
| (TOKEN_STRING, arg, _, _) ::
|
|
(TOKEN_COMMA, _, _, _) :: lex =>
|
|
let
|
|
val leaf = node_t_leaf ("String", arg)
|
|
val e = node_t_cons ("Prts", leaf, node_t_nil ())
|
|
in
|
|
loop_over_args
|
|
(node_t_cons ("Sequence", subtree, e), lex)
|
|
end
|
|
| (TOKEN_STRING, arg, _, _) :: lex =>
|
|
let
|
|
val lex = expect (TOKEN_RIGHTPAREN, lex)
|
|
val lex = expect (TOKEN_SEMICOLON, lex)
|
|
val leaf = node_t_leaf ("String", arg)
|
|
val e = node_t_cons ("Prts", leaf, node_t_nil ())
|
|
in
|
|
(node_t_cons ("Sequence", subtree, e), lex)
|
|
end
|
|
| _ :: _ =>
|
|
let
|
|
val (x, lex) = expr (0, lex)
|
|
val e = node_t_cons ("Prti", x, node_t_nil ())
|
|
val subtree = node_t_cons ("Sequence", subtree, e)
|
|
in
|
|
case+ peek lex of
|
|
| TOKEN_COMMA =>
|
|
let
|
|
val lex = expect (TOKEN_COMMA, lex)
|
|
in
|
|
loop_over_args (subtree, lex)
|
|
end
|
|
| _ =>
|
|
let
|
|
val lex = expect (TOKEN_RIGHTPAREN, lex)
|
|
val lex = expect (TOKEN_SEMICOLON, lex)
|
|
in
|
|
(subtree, lex)
|
|
end
|
|
end
|
|
| NIL => $raise truncated_lexical ()
|
|
in
|
|
loop_over_args (node_t_nil (), lex)
|
|
end
|
|
| (TOKEN_SEMICOLON, _, _, _) :: lex => (node_t_nil (), lex)
|
|
| (TOKEN_IDENTIFIER, arg, _, _) :: lex =>
|
|
let
|
|
val v = node_t_leaf ("Identifier", arg)
|
|
val lex = expect (TOKEN_ASSIGN, lex)
|
|
val (subtree, lex) = expr (0, lex)
|
|
val t = node_t_cons ("Assign", v, subtree)
|
|
val lex = expect (TOKEN_SEMICOLON, lex)
|
|
in
|
|
(t, lex)
|
|
end
|
|
| (TOKEN_WHILE, _, _, _) :: lex =>
|
|
let
|
|
val (e, lex) = paren_expr lex
|
|
val (t, lex) = stmt lex
|
|
in
|
|
(node_t_cons ("While", e, t), lex)
|
|
end
|
|
| (TOKEN_LEFTBRACE, _, _, _) :: lex =>
|
|
let
|
|
fun
|
|
loop_over_stmts (subtree : node_t,
|
|
lex : toktups_t) :
|
|
(node_t, toktups_t) =
|
|
case+ lex of
|
|
| (TOKEN_RIGHTBRACE, _, _, _) :: lex => (subtree, lex)
|
|
| (TOKEN_END_OF_INPUT, _, line_no, column_no) :: _ =>
|
|
$raise unterminated_statement_block (line_no, column_no)
|
|
| _ =>
|
|
let
|
|
val (e, lex) = stmt lex
|
|
in
|
|
loop_over_stmts
|
|
(node_t_cons ("Sequence", subtree, e), lex)
|
|
end
|
|
in
|
|
loop_over_stmts (node_t_nil (), lex)
|
|
end
|
|
| (TOKEN_END_OF_INPUT, _, _, _) :: lex => (node_t_nil (), lex)
|
|
| toktup :: _ => $raise expected_a_statement (toktup)
|
|
and
|
|
expr (prec : int,
|
|
lex : toktups_t) : (node_t, toktups_t) =
|
|
case+ lex of
|
|
| (TOKEN_LEFTPAREN, _, _, _) :: _ =>
|
|
(* '(' expr ')' *)
|
|
let
|
|
val (subtree, lex) = paren_expr lex
|
|
in
|
|
prec_climb (prec, subtree, lex)
|
|
end
|
|
| (TOKEN_ADD, _, _, _) :: lex =>
|
|
(* '+' expr *)
|
|
let
|
|
val (subtree, lex) = expr (precedence TOKEN_ADD, lex)
|
|
in
|
|
prec_climb (prec, subtree, lex)
|
|
end
|
|
| (TOKEN_SUBTRACT, _, _, _) :: lex =>
|
|
(* '-' expr *)
|
|
let
|
|
val (subtree, lex) = expr (precedence TOKEN_NEGATE, lex)
|
|
val subtree = node_t_cons ("Negate", subtree, node_t_nil ())
|
|
in
|
|
prec_climb (prec, subtree, lex)
|
|
end
|
|
| (TOKEN_NOT, _, _, _) :: lex =>
|
|
(* '!' expr *)
|
|
let
|
|
val (subtree, lex) = expr (precedence TOKEN_NOT, lex)
|
|
val subtree = node_t_cons ("Not", subtree, node_t_nil ())
|
|
in
|
|
prec_climb (prec, subtree, lex)
|
|
end
|
|
| (TOKEN_IDENTIFIER, arg, _, _) :: lex =>
|
|
let
|
|
val leaf = node_t_leaf ("Identifier", arg)
|
|
in
|
|
prec_climb (prec, leaf, lex)
|
|
end
|
|
| (TOKEN_INTEGER, arg, _, _) :: lex =>
|
|
let
|
|
val leaf = node_t_leaf ("Integer", arg)
|
|
in
|
|
prec_climb (prec, leaf, lex)
|
|
end
|
|
| toktup :: lex =>
|
|
$raise unexpected_primary (toktup)
|
|
| NIL =>
|
|
$raise truncated_lexical ()
|
|
and
|
|
prec_climb (prec : int,
|
|
subtree : node_t,
|
|
lex : toktups_t) : (node_t, toktups_t) =
|
|
case+ peek lex of
|
|
| tokval =>
|
|
if ~binary_op tokval then
|
|
(subtree, lex)
|
|
else if precedence tokval < prec then
|
|
(subtree, lex)
|
|
else
|
|
case+ lex of
|
|
| toktup :: lex =>
|
|
let
|
|
val q =
|
|
if right_assoc (toktup.0) then
|
|
precedence tokval
|
|
else
|
|
succ (precedence tokval)
|
|
|
|
val (e, lex) = expr (q, lex)
|
|
val subtree1 =
|
|
node_t_cons (opname (toktup.0), subtree, e)
|
|
in
|
|
prec_climb (prec, subtree1, lex)
|
|
end
|
|
and
|
|
paren_expr (lex : toktups_t) : (node_t, toktups_t) =
|
|
(* '(' expr ')' *)
|
|
let
|
|
val lex = expect (TOKEN_LEFTPAREN, lex)
|
|
val (subtree, lex) = expr (0, lex)
|
|
val lex = expect (TOKEN_RIGHTPAREN, lex)
|
|
in
|
|
(subtree, lex)
|
|
end
|
|
|
|
fun
|
|
main_loop (subtree : node_t,
|
|
lex : toktups_t) : node_t =
|
|
case+ peek lex of
|
|
| TOKEN_END_OF_INPUT => subtree
|
|
| _ =>
|
|
let
|
|
val (x, lex) = stmt lex
|
|
in
|
|
main_loop (node_t_cons ("Sequence", subtree, x), lex)
|
|
end
|
|
in
|
|
main_loop (node_t_nil (), lex)
|
|
end
|
|
|
|
fn
|
|
print_ast (outf : FILEref,
|
|
ast : node_t) : void =
|
|
let
|
|
fun
|
|
traverse (ast : node_t) : void =
|
|
case+ ast of
|
|
| node_t_nil () => fprintln! (outf, ";")
|
|
| node_t_leaf (str, arg) => fprintln! (outf, str, " ", arg)
|
|
| node_t_cons (str, left, right) =>
|
|
begin
|
|
fprintln! (outf, str);
|
|
traverse left;
|
|
traverse right
|
|
end
|
|
in
|
|
traverse ast
|
|
end
|
|
|
|
(********************************************************************)
|
|
|
|
fn
|
|
main_program (inpf : FILEref,
|
|
outf : FILEref) : int =
|
|
let
|
|
val toklst = read_lex_file inpf
|
|
val ast = parse toklst
|
|
val () = print_ast (outf, ast)
|
|
in
|
|
0
|
|
end
|
|
|
|
fn
|
|
error_start (line_no : ullint,
|
|
column_no : ullint) : void =
|
|
print! ("(", line_no, ", ", column_no, ") error: ")
|
|
|
|
implement
|
|
main (argc, argv) =
|
|
let
|
|
val inpfname =
|
|
if 2 <= argc then
|
|
$UN.cast{string} argv[1]
|
|
else
|
|
"-"
|
|
val outfname =
|
|
if 3 <= argc then
|
|
$UN.cast{string} argv[2]
|
|
else
|
|
"-"
|
|
in
|
|
try
|
|
let
|
|
val inpf =
|
|
if (inpfname : string) = "-" then
|
|
stdin_ref
|
|
else
|
|
fileref_open_exn (inpfname, file_mode_r)
|
|
|
|
val outf =
|
|
if (outfname : string) = "-" then
|
|
stdout_ref
|
|
else
|
|
fileref_open_exn (outfname, file_mode_w)
|
|
in
|
|
main_program (inpf, outf)
|
|
end
|
|
with
|
|
| ~ unexpected_primary @(tok, _, line_no, column_no) =>
|
|
begin
|
|
error_start (line_no, column_no);
|
|
println! ("Expecting a primary, found: ", token_text tok);
|
|
1
|
|
end
|
|
| ~ unexpected_token (@(tok, _, line_no, column_no), expected) =>
|
|
begin
|
|
error_start (line_no, column_no);
|
|
println! ("Expecting '", token_text expected,
|
|
"', found '", token_text tok, "'");
|
|
1
|
|
end
|
|
| ~ expected_a_statement @(tok, _, line_no, column_no) =>
|
|
begin
|
|
error_start (line_no, column_no);
|
|
println! ("expecting start of statement, found '",
|
|
token_text tok, "'");
|
|
1
|
|
end
|
|
| ~ unterminated_statement_block (line_no, column_no) =>
|
|
begin
|
|
error_start (line_no, column_no);
|
|
println! ("unterminated statement block");
|
|
1
|
|
end
|
|
| ~ truncated_lexical () =>
|
|
begin
|
|
println! ("truncated input token stream");
|
|
2
|
|
end
|
|
| ~ bad_lex_integer (s) =>
|
|
begin
|
|
println! ("bad integer literal in the token stream: '",
|
|
s, "'");
|
|
2
|
|
end
|
|
| ~ bad_string_literal (s) =>
|
|
begin
|
|
println! ("bad string literal in the token stream: '",
|
|
s, "'");
|
|
2
|
|
end
|
|
| ~ bad_lex_token_name (s) =>
|
|
begin
|
|
println! ("bad token name in the token stream: '",
|
|
s, "'");
|
|
2
|
|
end
|
|
end
|
|
|
|
(********************************************************************)
|