# -*- ObjectIcon -*- # # The Rosetta Code lexical analyzer in Object Icon. Based upon the ATS # implementation. # # Usage: lex [INPUTFILE [OUTPUTFILE]] # If INPUTFILE or OUTPUTFILE is "-" or missing, then standard input # or standard output is used, respectively. *) # import io $define EOF -1 $define TOKEN_ELSE 0 $define TOKEN_IF 1 $define TOKEN_PRINT 2 $define TOKEN_PUTC 3 $define TOKEN_WHILE 4 $define TOKEN_MULTIPLY 5 $define TOKEN_DIVIDE 6 $define TOKEN_MOD 7 $define TOKEN_ADD 8 $define TOKEN_SUBTRACT 9 $define TOKEN_NEGATE 10 $define TOKEN_LESS 11 $define TOKEN_LESSEQUAL 12 $define TOKEN_GREATER 13 $define TOKEN_GREATEREQUAL 14 $define TOKEN_EQUAL 15 $define TOKEN_NOTEQUAL 16 $define TOKEN_NOT 17 $define TOKEN_ASSIGN 18 $define TOKEN_AND 19 $define TOKEN_OR 20 $define TOKEN_LEFTPAREN 21 $define TOKEN_RIGHTPAREN 22 $define TOKEN_LEFTBRACE 23 $define TOKEN_RIGHTBRACE 24 $define TOKEN_SEMICOLON 25 $define TOKEN_COMMA 26 $define TOKEN_IDENTIFIER 27 $define TOKEN_INTEGER 28 $define TOKEN_STRING 29 $define TOKEN_END_OF_INPUT 30 global whitespace global ident_start global ident_continuation procedure main(args) local inpf, outf local pushback_buffer, inp, pushback initial { whitespace := ' \t\v\f\r\n' ident_start := '_' ++ &letters ident_continuation := ident_start ++ &digits } inpf := FileStream.stdin outf := FileStream.stdout if 1 <= *args & args[1] ~== "-" then { inpf := FileStream(args[1], FileOpt.RDONLY) | stop(&why) } if 2 <= *args & args[2] ~== "-" then { outf := FileStream(args[2], ior(FileOpt.WRONLY, FileOpt.TRUNC, FileOpt.CREAT)) | stop(&why) } pushback_buffer := [] inp := create inputter(inpf, pushback_buffer) pushback := create repeat push(pushback_buffer, \@&source) @pushback # The first invocation does nothing. scan_text(outf, inp, pushback) end procedure scan_text(outf, inp, pushback) local ch while /ch | ch[1] ~=== EOF do { skip_spaces_and_comments(inp, pushback) ch := @inp if ch[1] === EOF then { print_token(outf, [TOKEN_END_OF_INPUT, "", ch[2], ch[3]]) } else { ch @pushback print_token(outf, get_next_token(inp, pushback)) } } end procedure get_next_token(inp, pushback) local ch, ch1 local ln, cn skip_spaces_and_comments(inp, pushback) ch := @inp ln := ch[2] # line number cn := ch[3] # column number case ch[1] of { "," : return [TOKEN_COMMA, ",", ln, cn] ";" : return [TOKEN_SEMICOLON, ";", ln, cn] "(" : return [TOKEN_LEFTPAREN, "(", ln, cn] ")" : return [TOKEN_RIGHTPAREN, ")", ln, cn] "{" : return [TOKEN_LEFTBRACE, "{", ln, cn] "}" : return [TOKEN_RIGHTBRACE, "}", ln, cn] "*" : return [TOKEN_MULTIPLY, "*", ln, cn] "/" : return [TOKEN_DIVIDE, "/", ln, cn] "%" : return [TOKEN_MOD, "%", ln, cn] "+" : return [TOKEN_ADD, "+", ln, cn] "-" : return [TOKEN_SUBTRACT, "-", ln, cn] "<" : { ch1 := @inp if ch1[1] === "=" then { return [TOKEN_LESSEQUAL, "<=", ln, cn] } else { ch1 @pushback return [TOKEN_LESS, "<", ln, cn] } } ">" : { ch1 := @inp if ch1[1] === "=" then { return [TOKEN_GREATEREQUAL, ">=", ln, cn] } else { ch1 @pushback return [TOKEN_GREATER, ">", ln, cn] } } "=" : { ch1 := @inp if ch1[1] === "=" then { return [TOKEN_EQUAL, "==", ln, cn] } else { ch1 @pushback return [TOKEN_ASSIGN, "=", ln, cn] } } "!" : { ch1 := @inp if ch1[1] === "=" then { return [TOKEN_NOTEQUAL, "!=", ln, cn] } else { ch1 @pushback return [TOKEN_NOT, "!", ln, cn] } } "&" : { ch1 := @inp if ch1[1] === "&" then { return [TOKEN_AND, "&&", ln, cn] } else { unexpected_character(ln, cn, ch) } } "|" : { ch1 := @inp if ch1[1] === "|" then { return [TOKEN_OR, "||", ln, cn] } else { unexpected_character(ln, cn, ch) } } "\"" : { ch @pushback return scan_string_literal(inp) } "'" : { ch @pushback return scan_character_literal(inp, pushback) } default : { if any(&digits, ch[1]) then { ch @pushback return scan_integer_literal(inp, pushback) } else if any(ident_start, ch[1]) then { ch @pushback return scan_identifier_or_reserved_word (inp, pushback) } else { unexpected_character(ln, cn, ch) } } } end procedure scan_identifier_or_reserved_word(inp, pushback) local ch local s local line_no, column_no s := "" ch := @inp line_no := ch[2] column_no := ch[3] while EOF ~=== ch[1] & any(ident_continuation, ch[1]) do { s ||:= ch[1] ch := @inp } ch @pushback return reserved_word_lookup (s, line_no, column_no) end procedure scan_integer_literal(inp, pushback) local ch local s local line_no, column_no s := "" ch := @inp line_no := ch[2] column_no := ch[3] while EOF ~=== ch[1] & any(ident_continuation, ch[1]) do { s ||:= ch[1] ch := @inp } ch @pushback not upto(~&digits, s) | invalid_integer_literal(line_no, column_no, s) return [TOKEN_INTEGER, s, line_no, column_no] end procedure scan_character_literal(inp, pushback) local ch, ch1 local close_quote local toktup local line_no, column_no ch := @inp # The opening quote. close_quote := ch[1] # Same as the opening quote. ch @pushback line_no := ch[2] column_no := ch[3] toktup := scan_character_literal_without_checking_end(inp) ch1 := @inp if ch1[1] ~=== close_quote then { repeat { case ch1[1] of { EOF : unterminated_character_literal(line_no, column_no) close_quote : multicharacter_literal(line_no, column_no) default : ch1 := @inp } } } return toktup end procedure scan_character_literal_without_checking_end(inp) local ch, ch1, ch2 ch := @inp # The opening quote. ch1 := @inp EOF ~=== ch1[1] | unterminated_character_literal(ch[2], ch[3]) if ch1[1] == "\\" then { ch2 := @inp EOF ~=== ch2[1] | unterminated_character_literal(ch[2], ch[3]) case ch2[1] of { "n" : return [TOKEN_INTEGER, string(ord("\n")), ch[2], ch[3]] "\\" : return [TOKEN_INTEGER, string(ord("\\")), ch[2], ch[3]] default : unsupported_escape(ch1[2], ch1[3], ch2) } } else { return [TOKEN_INTEGER, string(ord(ch1[1])), ch[2], ch[3]] } end procedure scan_string_literal(inp) local ch, ch1, ch2 local line_no, column_no local close_quote local s local retval ch := @inp # The opening quote close_quote := ch[1] # Same as the opening quote. line_no := ch[2] column_no := ch[3] s := ch[1] until \retval do { ch1 := @inp ch1[1] ~=== EOF | unterminated_string_literal (line_no, column_no, "end of input") ch1[1] ~== "\n" | unterminated_string_literal (line_no, column_no, "end of line") if ch1[1] == close_quote then { retval := [TOKEN_STRING, s || close_quote, line_no, column_no] } else if ch1[1] ~== "\\" then { s ||:= ch1[1] } else { ch2 := @inp EOF ~=== ch2[1] | unsupported_escape(line_no, column_no, ch2) case ch2[1] of { "n" : s ||:= "\\n" "\\" : s ||:= "\\\\" default : unsupported_escape(line_no, column_no, ch2) } } } return retval end procedure skip_spaces_and_comments(inp, pushback) local ch, ch1 repeat { ch := @inp (EOF === ch[1]) & { ch @pushback; return } if not any(whitespace, ch[1]) then { (ch[1] == "/") | { ch @pushback; return } (ch1 := @inp) | { ch @pushback; return } (ch1[1] == "*") | { ch1 @pushback; ch @pushback; return } scan_comment(inp, ch[2], ch[3]) } } end procedure scan_comment(inp, line_no, column_no) local ch, ch1 until (\ch)[1] == "*" & (\ch1)[1] == "/" do { ch := @inp (EOF === ch[1]) & unterminated_comment(line_no, column_no) if ch[1] == "*" then { ch1 := @inp (EOF === ch[1]) & unterminated_comment(line_no, column_no) } } return end procedure reserved_word_lookup(s, line_no, column_no) # Lookup is by an extremely simple perfect hash. static reserved_words static reserved_word_tokens local hashval, token, toktup initial { reserved_words := ["if", "print", "else", "", "putc", "", "", "while", ""] reserved_word_tokens := [TOKEN_IF, TOKEN_PRINT, TOKEN_ELSE, TOKEN_IDENTIFIER, TOKEN_PUTC, TOKEN_IDENTIFIER, TOKEN_IDENTIFIER, TOKEN_WHILE, TOKEN_IDENTIFIER] } if *s < 2 then { toktup := [TOKEN_IDENTIFIER, s, line_no, column_no] } else { hashval := ((ord(s[1]) + ord(s[2])) % (*reserved_words)) + 1 token := reserved_word_tokens[hashval] if token = TOKEN_IDENTIFIER | s ~== reserved_words[hashval] then { toktup := [TOKEN_IDENTIFIER, s, line_no, column_no] } else { toktup := [token, s, line_no, column_no] } } return toktup end procedure print_token(outf, toktup) static token_names local s_line, s_column initial { token_names := ["Keyword_else", "Keyword_if", "Keyword_print", "Keyword_putc", "Keyword_while", "Op_multiply", "Op_divide", "Op_mod", "Op_add", "Op_subtract", "Op_negate", "Op_less", "Op_lessequal", "Op_greater", "Op_greaterequal", "Op_equal", "Op_notequal", "Op_not", "Op_assign", "Op_and", "Op_or", "LeftParen", "RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma", "Identifier", "Integer", "String", "End_of_input"] } /outf := FileStream.stdout s_line := string(toktup[3]) s_column := string(toktup[4]) writes(outf, right (s_line, max(5, *s_line))) writes(outf, " ") writes(outf, right (s_column, max(5, *s_column))) writes(outf, " ") writes(outf, token_names[toktup[1] + 1]) case toktup[1] of { TOKEN_IDENTIFIER : writes(outf, " ", toktup[2]) TOKEN_INTEGER : writes(outf, " ", toktup[2]) TOKEN_STRING : writes(outf, " ", toktup[2]) } write(outf) return end procedure inputter(inpf, pushback_buffer) local buffer local line_no, column_no local c buffer := "" line_no := 1 column_no := 1 repeat { buffer? { until *pushback_buffer = 0 & pos(0) do { if *pushback_buffer ~= 0 then { suspend pop(pushback_buffer) } else { c := move(1) suspend [c, line_no, column_no] if c == "\n" then { line_no +:= 1 column_no := 1 } else { column_no +:= 1 } } } } (buffer := reads(inpf, 2048)) | suspend [EOF, line_no, column_no] } end procedure unterminated_comment(line_no, column_no) error("unterminated comment starting at ", line_no, ":", column_no) end procedure unexpected_character(line_no, column_no, ch) error("unexpected character '", ch[1], "' starting at ", line_no, ":", column_no) end procedure unterminated_string_literal (line_no, column_no, cause) error("unterminated string literal (", cause, ") starting at ", line_no, ":", column_no) end procedure unsupported_escape (line_no, column_no, ch) if ch[1] === EOF then { error("unexpected \\ at end of input", " starting at ", line_no, ":", column_no) } else { error("unsupported escape \\", ch[1], " starting at ", line_no, ":", column_no) } end procedure invalid_integer_literal(line_no, column_no, s) error("invalid integer literal ", s, " starting at ", line_no, ":", column_no) end procedure unterminated_character_literal(line_no, column_no) error("unterminated character literal starting at ", line_no, ":", column_no) end procedure multicharacter_literal(line_no, column_no) error("unsupported multicharacter literal starting at ", line_no, ":", column_no) end procedure error(args[]) write!([FileStream.stderr] ||| args) exit(1) end