553 lines
21 KiB
Zig
553 lines
21 KiB
Zig
const std = @import("std");
|
|
|
|
pub const NodeValue = union(enum) {
|
|
integer: i32,
|
|
string: []const u8,
|
|
|
|
fn fromToken(token: Token) ?NodeValue {
|
|
if (token.value) |value| {
|
|
switch (value) {
|
|
.integer => |int| return NodeValue{ .integer = int },
|
|
.string => |str| return NodeValue{ .string = str },
|
|
}
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
};
|
|
|
|
pub const Tree = struct {
|
|
left: ?*Tree,
|
|
right: ?*Tree,
|
|
typ: NodeType,
|
|
value: ?NodeValue = null,
|
|
};
|
|
|
|
pub const ParserError = error{
|
|
OutOfMemory,
|
|
ExpectedNotFound,
|
|
} || std.fmt.ParseIntError;
|
|
|
|
pub const Parser = struct {
|
|
token_it: LexerOutputTokenizer,
|
|
curr: Token,
|
|
allocator: std.mem.Allocator,
|
|
|
|
const Self = @This();
|
|
|
|
pub fn init(allocator: std.mem.Allocator, str: []const u8) Self {
|
|
return Self{
|
|
.token_it = LexerOutputTokenizer.init(str),
|
|
.curr = Token{ .line = 0, .col = 0, .typ = .unknown },
|
|
.allocator = allocator,
|
|
};
|
|
}
|
|
|
|
fn makeNode(self: *Self, typ: NodeType, left: ?*Tree, right: ?*Tree) !*Tree {
|
|
const result = try self.allocator.create(Tree);
|
|
result.* = Tree{ .left = left, .right = right, .typ = typ };
|
|
return result;
|
|
}
|
|
|
|
fn makeLeaf(self: *Self, typ: NodeType, value: ?NodeValue) !*Tree {
|
|
const result = try self.allocator.create(Tree);
|
|
result.* = Tree{ .left = null, .right = null, .typ = typ, .value = value };
|
|
return result;
|
|
}
|
|
|
|
pub fn parse(self: *Self) ParserError!?*Tree {
|
|
try self.next();
|
|
var result: ?*Tree = null;
|
|
while (true) {
|
|
const stmt = try self.parseStmt();
|
|
result = try self.makeNode(.sequence, result, stmt);
|
|
if (self.curr.typ == .eof) break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// Classic "Recursive descent" statement parser.
|
|
fn parseStmt(self: *Self) ParserError!?*Tree {
|
|
var result: ?*Tree = null;
|
|
switch (self.curr.typ) {
|
|
.kw_print => {
|
|
try self.next();
|
|
try self.expect(.left_paren);
|
|
// Parse each print's argument as an expression delimited by commas until we reach
|
|
// a closing parens.
|
|
while (true) {
|
|
var expr: ?*Tree = null;
|
|
if (self.curr.typ == .string) {
|
|
expr = try self.makeNode(
|
|
.prts,
|
|
try self.makeLeaf(.string, NodeValue.fromToken(self.curr)),
|
|
null,
|
|
);
|
|
try self.next();
|
|
} else {
|
|
expr = try self.makeNode(.prti, try self.parseExpr(0), null);
|
|
}
|
|
result = try self.makeNode(.sequence, result, expr);
|
|
if (self.curr.typ != .comma) break;
|
|
try self.next();
|
|
}
|
|
try self.expect(.right_paren);
|
|
try self.expect(.semicolon);
|
|
},
|
|
.kw_putc => {
|
|
try self.next();
|
|
result = try self.makeNode(.prtc, try self.parseParenExpr(), null);
|
|
try self.expect(.semicolon);
|
|
},
|
|
.kw_while => {
|
|
try self.next();
|
|
const expr = try self.parseParenExpr();
|
|
result = try self.makeNode(.kw_while, expr, try self.parseStmt());
|
|
},
|
|
.kw_if => {
|
|
try self.next();
|
|
const expr = try self.parseParenExpr();
|
|
const if_stmt = try self.parseStmt();
|
|
const else_stmt = blk: {
|
|
if (self.curr.typ == .kw_else) {
|
|
try self.next();
|
|
break :blk try self.parseStmt();
|
|
} else {
|
|
break :blk null;
|
|
}
|
|
};
|
|
const stmt_node = try self.makeNode(.kw_if, if_stmt, else_stmt);
|
|
// If-statement uses `.kw_if` node for both first node with `expr` on the left
|
|
// and statements on the right and also `.kw_if` node which goes to the right
|
|
// and contains both if-branch and else-branch.
|
|
result = try self.makeNode(.kw_if, expr, stmt_node);
|
|
},
|
|
.left_brace => {
|
|
try self.next();
|
|
while (self.curr.typ != .right_brace and self.curr.typ != .eof) {
|
|
result = try self.makeNode(.sequence, result, try self.parseStmt());
|
|
}
|
|
try self.expect(.right_brace);
|
|
},
|
|
.identifier => {
|
|
const identifer = try self.makeLeaf(.identifier, NodeValue.fromToken(self.curr));
|
|
try self.next();
|
|
try self.expect(.assign);
|
|
const expr = try self.parseExpr(0);
|
|
result = try self.makeNode(.assign, identifer, expr);
|
|
try self.expect(.semicolon);
|
|
},
|
|
.semicolon => try self.next(),
|
|
else => {
|
|
std.debug.print("\nSTMT: UNKNOWN {}\n", .{self.curr});
|
|
std.os.exit(1);
|
|
},
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// "Precedence climbing" expression parser.
|
|
fn parseExpr(self: *Self, precedence: i8) ParserError!?*Tree {
|
|
var result: ?*Tree = null;
|
|
switch (self.curr.typ) {
|
|
.left_paren => {
|
|
result = try self.parseParenExpr();
|
|
},
|
|
.subtract => {
|
|
try self.next();
|
|
const metadata = NodeMetadata.find(.negate);
|
|
const expr = try self.parseExpr(metadata.precedence);
|
|
result = try self.makeNode(.negate, expr, null);
|
|
},
|
|
.not => {
|
|
try self.next();
|
|
const metadata = NodeMetadata.find(.not);
|
|
const expr = try self.parseExpr(metadata.precedence);
|
|
result = try self.makeNode(.not, expr, null);
|
|
},
|
|
.add => {
|
|
try self.next();
|
|
result = try self.parseExpr(precedence);
|
|
},
|
|
.integer, .identifier => {
|
|
const node_type = NodeMetadata.find(self.curr.typ).node_type;
|
|
result = try self.makeLeaf(node_type, NodeValue.fromToken(self.curr));
|
|
try self.next();
|
|
},
|
|
else => {
|
|
std.debug.print("\nEXPR: UNKNOWN {}\n", .{self.curr});
|
|
std.os.exit(1);
|
|
},
|
|
}
|
|
|
|
var curr_metadata = NodeMetadata.find(self.curr.typ);
|
|
while (curr_metadata.binary and curr_metadata.precedence >= precedence) {
|
|
const new_precedence =
|
|
if (curr_metadata.right_associative)
|
|
curr_metadata.precedence
|
|
else
|
|
curr_metadata.precedence + 1;
|
|
try self.next();
|
|
const sub_expr = try self.parseExpr(new_precedence);
|
|
result = try self.makeNode(curr_metadata.node_type, result, sub_expr);
|
|
curr_metadata = NodeMetadata.find(self.curr.typ);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
fn parseParenExpr(self: *Self) ParserError!?*Tree {
|
|
try self.expect(.left_paren);
|
|
const result = try self.parseExpr(0);
|
|
try self.expect(.right_paren);
|
|
return result;
|
|
}
|
|
|
|
fn next(self: *Self) ParserError!void {
|
|
const token = try self.token_it.next();
|
|
if (token) |tok| {
|
|
self.curr = tok;
|
|
} else {
|
|
self.curr = Token{ .line = 0, .col = 0, .typ = .unknown };
|
|
}
|
|
}
|
|
|
|
fn expect(self: *Self, token_type: TokenType) ParserError!void {
|
|
if (self.curr.typ != token_type) {
|
|
const expected_str = NodeMetadata.find(token_type).token_str;
|
|
const found_str = NodeMetadata.find(self.curr.typ).token_str;
|
|
std.debug.print(
|
|
"({d}, {d}) error: Expecting '{s}', found '{s}'\n",
|
|
.{ self.curr.line, self.curr.col, expected_str, found_str },
|
|
);
|
|
return ParserError.ExpectedNotFound;
|
|
}
|
|
try self.next();
|
|
}
|
|
};
|
|
|
|
pub fn parse(allocator: std.mem.Allocator, str: []const u8) !?*Tree {
|
|
var parser = Parser.init(allocator, str);
|
|
return try parser.parse();
|
|
}
|
|
|
|
pub fn main() !void {
|
|
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
defer arena.deinit();
|
|
const allocator = arena.allocator();
|
|
|
|
var arg_it = std.process.args();
|
|
_ = try arg_it.next(allocator) orelse unreachable; // program name
|
|
const file_name = arg_it.next(allocator);
|
|
// We accept both files and standard input.
|
|
var file_handle = blk: {
|
|
if (file_name) |file_name_delimited| {
|
|
const fname: []const u8 = try file_name_delimited;
|
|
break :blk try std.fs.cwd().openFile(fname, .{});
|
|
} else {
|
|
break :blk std.io.getStdIn();
|
|
}
|
|
};
|
|
defer file_handle.close();
|
|
const input_content = try file_handle.readToEndAlloc(allocator, std.math.maxInt(usize));
|
|
|
|
const result: ?*Tree = try parse(allocator, input_content);
|
|
const result_str = try astToFlattenedString(allocator, result);
|
|
_ = try std.io.getStdOut().write(result_str);
|
|
}
|
|
|
|
const NodeMetadata = struct {
|
|
token_type: TokenType,
|
|
right_associative: bool,
|
|
binary: bool,
|
|
unary: bool,
|
|
precedence: i8,
|
|
node_type: NodeType,
|
|
token_str: []const u8,
|
|
|
|
const self = [_]NodeMetadata{
|
|
.{ .token_type = .multiply, .right_associative = false, .binary = true, .unary = false, .precedence = 13, .node_type = .multiply, .token_str = "*" },
|
|
.{ .token_type = .divide, .right_associative = false, .binary = true, .unary = false, .precedence = 13, .node_type = .divide, .token_str = "/" },
|
|
.{ .token_type = .mod, .right_associative = false, .binary = true, .unary = false, .precedence = 13, .node_type = .mod, .token_str = "%" },
|
|
.{ .token_type = .add, .right_associative = false, .binary = true, .unary = false, .precedence = 12, .node_type = .add, .token_str = "+" },
|
|
.{ .token_type = .subtract, .right_associative = false, .binary = true, .unary = false, .precedence = 12, .node_type = .subtract, .token_str = "-" },
|
|
.{ .token_type = .negate, .right_associative = false, .binary = false, .unary = true, .precedence = 14, .node_type = .negate, .token_str = "-" },
|
|
.{ .token_type = .less, .right_associative = false, .binary = true, .unary = false, .precedence = 10, .node_type = .less, .token_str = "<" },
|
|
.{ .token_type = .less_equal, .right_associative = false, .binary = true, .unary = false, .precedence = 10, .node_type = .less_equal, .token_str = "<=" },
|
|
.{ .token_type = .greater, .right_associative = false, .binary = true, .unary = false, .precedence = 10, .node_type = .greater, .token_str = ">" },
|
|
.{ .token_type = .greater_equal, .right_associative = false, .binary = true, .unary = false, .precedence = 10, .node_type = .greater_equal, .token_str = ">=" },
|
|
.{ .token_type = .equal, .right_associative = false, .binary = true, .unary = false, .precedence = 9, .node_type = .equal, .token_str = "=" },
|
|
.{ .token_type = .not_equal, .right_associative = false, .binary = true, .unary = false, .precedence = 9, .node_type = .not_equal, .token_str = "!=" },
|
|
.{ .token_type = .not, .right_associative = false, .binary = false, .unary = true, .precedence = 14, .node_type = .not, .token_str = "!" },
|
|
.{ .token_type = .assign, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .assign, .token_str = "=" },
|
|
.{ .token_type = .bool_and, .right_associative = false, .binary = true, .unary = false, .precedence = 5, .node_type = .bool_and, .token_str = "&&" },
|
|
.{ .token_type = .bool_or, .right_associative = false, .binary = true, .unary = false, .precedence = 4, .node_type = .bool_or, .token_str = "||" },
|
|
.{ .token_type = .left_paren, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = "(" },
|
|
.{ .token_type = .right_paren, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = ")" },
|
|
.{ .token_type = .left_brace, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = "{" },
|
|
.{ .token_type = .right_brace, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = "}" },
|
|
.{ .token_type = .semicolon, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = ";" },
|
|
.{ .token_type = .comma, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = "," },
|
|
.{ .token_type = .kw_if, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .kw_if, .token_str = "if" },
|
|
.{ .token_type = .kw_else, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = "else" },
|
|
.{ .token_type = .kw_while, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .kw_while, .token_str = "while" },
|
|
.{ .token_type = .kw_print, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = "print" },
|
|
.{ .token_type = .kw_putc, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = "putc" },
|
|
.{ .token_type = .identifier, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .identifier, .token_str = "Identifier" },
|
|
.{ .token_type = .integer, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .integer, .token_str = "Integer literal" },
|
|
.{ .token_type = .string, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .string, .token_str = "String literal" },
|
|
.{ .token_type = .eof, .right_associative = false, .binary = false, .unary = false, .precedence = -1, .node_type = .unknown, .token_str = "End of line" },
|
|
};
|
|
|
|
pub fn find(token_type: TokenType) NodeMetadata {
|
|
for (self) |metadata| {
|
|
if (metadata.token_type == token_type) return metadata;
|
|
} else {
|
|
unreachable;
|
|
}
|
|
}
|
|
};
|
|
|
|
pub const NodeType = enum {
|
|
unknown,
|
|
identifier,
|
|
string,
|
|
integer,
|
|
sequence,
|
|
kw_if,
|
|
prtc,
|
|
prts,
|
|
prti,
|
|
kw_while,
|
|
assign,
|
|
negate,
|
|
not,
|
|
multiply,
|
|
divide,
|
|
mod,
|
|
add,
|
|
subtract,
|
|
less,
|
|
less_equal,
|
|
greater,
|
|
greater_equal,
|
|
equal,
|
|
not_equal,
|
|
bool_and,
|
|
bool_or,
|
|
|
|
pub fn toString(self: NodeType) []const u8 {
|
|
return switch (self) {
|
|
.unknown => "UNKNOWN",
|
|
.identifier => "Identifier",
|
|
.string => "String",
|
|
.integer => "Integer",
|
|
.sequence => "Sequence",
|
|
.kw_if => "If",
|
|
.prtc => "Prtc",
|
|
.prts => "Prts",
|
|
.prti => "Prti",
|
|
.kw_while => "While",
|
|
.assign => "Assign",
|
|
.negate => "Negate",
|
|
.not => "Not",
|
|
.multiply => "Multiply",
|
|
.divide => "Divide",
|
|
.mod => "Mod",
|
|
.add => "Add",
|
|
.subtract => "Subtract",
|
|
.less => "Less",
|
|
.less_equal => "LessEqual",
|
|
.greater => "Greater",
|
|
.greater_equal => "GreaterEqual",
|
|
.equal => "Equal",
|
|
.not_equal => "NotEqual",
|
|
.bool_and => "And",
|
|
.bool_or => "Or",
|
|
};
|
|
}
|
|
};
|
|
|
|
fn astToFlattenedString(allocator: std.mem.Allocator, tree: ?*Tree) ![]const u8 {
|
|
var result = std.ArrayList(u8).init(allocator);
|
|
var writer = result.writer();
|
|
try treeToString(allocator, writer, tree);
|
|
return result.items;
|
|
}
|
|
|
|
pub const TokenType = enum {
|
|
unknown,
|
|
multiply,
|
|
divide,
|
|
mod,
|
|
add,
|
|
subtract,
|
|
negate,
|
|
less,
|
|
less_equal,
|
|
greater,
|
|
greater_equal,
|
|
equal,
|
|
not_equal,
|
|
not,
|
|
assign,
|
|
bool_and,
|
|
bool_or,
|
|
left_paren,
|
|
right_paren,
|
|
left_brace,
|
|
right_brace,
|
|
semicolon,
|
|
comma,
|
|
kw_if,
|
|
kw_else,
|
|
kw_while,
|
|
kw_print,
|
|
kw_putc,
|
|
identifier,
|
|
integer,
|
|
string,
|
|
eof,
|
|
|
|
const from_string_map = std.ComptimeStringMap(TokenType, .{
|
|
.{ "Op_multiply", .multiply },
|
|
.{ "Op_divide", .divide },
|
|
.{ "Op_mod", .mod },
|
|
.{ "Op_add", .add },
|
|
.{ "Op_subtract", .subtract },
|
|
.{ "Op_negate", .negate },
|
|
.{ "Op_less", .less },
|
|
.{ "Op_lessequal", .less_equal },
|
|
.{ "Op_greater", .greater },
|
|
.{ "Op_greaterequal", .greater_equal },
|
|
.{ "Op_equal", .equal },
|
|
.{ "Op_notequal", .not_equal },
|
|
.{ "Op_not", .not },
|
|
.{ "Op_assign", .assign },
|
|
.{ "Op_and", .bool_and },
|
|
.{ "Op_or", .bool_or },
|
|
.{ "LeftParen", .left_paren },
|
|
.{ "RightParen", .right_paren },
|
|
.{ "LeftBrace", .left_brace },
|
|
.{ "RightBrace", .right_brace },
|
|
.{ "Semicolon", .semicolon },
|
|
.{ "Comma", .comma },
|
|
.{ "Keyword_if", .kw_if },
|
|
.{ "Keyword_else", .kw_else },
|
|
.{ "Keyword_while", .kw_while },
|
|
.{ "Keyword_print", .kw_print },
|
|
.{ "Keyword_putc", .kw_putc },
|
|
.{ "Identifier", .identifier },
|
|
.{ "Integer", .integer },
|
|
.{ "String", .string },
|
|
.{ "End_of_input", .eof },
|
|
});
|
|
|
|
pub fn fromString(str: []const u8) TokenType {
|
|
return from_string_map.get(str).?;
|
|
}
|
|
};
|
|
|
|
pub const TokenValue = union(enum) {
|
|
integer: i32,
|
|
string: []const u8,
|
|
};
|
|
|
|
pub const Token = struct {
|
|
line: usize,
|
|
col: usize,
|
|
typ: TokenType = .unknown,
|
|
value: ?TokenValue = null,
|
|
};
|
|
|
|
const TreeToStringError = error{OutOfMemory};
|
|
|
|
fn treeToString(
|
|
allocator: std.mem.Allocator,
|
|
writer: std.ArrayList(u8).Writer,
|
|
tree: ?*Tree,
|
|
) TreeToStringError!void {
|
|
if (tree) |t| {
|
|
_ = try writer.write(try std.fmt.allocPrint(
|
|
allocator,
|
|
"{s}",
|
|
.{t.typ.toString()},
|
|
));
|
|
switch (t.typ) {
|
|
.string, .identifier => _ = try writer.write(try std.fmt.allocPrint(
|
|
allocator,
|
|
" {s}\n",
|
|
.{t.value.?.string},
|
|
)),
|
|
.integer => _ = try writer.write(try std.fmt.allocPrint(
|
|
allocator,
|
|
" {d}\n",
|
|
.{t.value.?.integer},
|
|
)),
|
|
else => {
|
|
_ = try writer.write(try std.fmt.allocPrint(
|
|
allocator,
|
|
"\n",
|
|
.{},
|
|
));
|
|
try treeToString(allocator, writer, t.left);
|
|
try treeToString(allocator, writer, t.right);
|
|
},
|
|
}
|
|
} else {
|
|
_ = try writer.write(try std.fmt.allocPrint(
|
|
allocator,
|
|
";\n",
|
|
.{},
|
|
));
|
|
}
|
|
}
|
|
|
|
pub const LexerOutputTokenizer = struct {
|
|
it: std.mem.SplitIterator(u8),
|
|
|
|
const Self = @This();
|
|
|
|
pub fn init(str: []const u8) Self {
|
|
return Self{ .it = std.mem.split(u8, str, "\n") };
|
|
}
|
|
|
|
pub fn next(self: *Self) std.fmt.ParseIntError!?Token {
|
|
if (self.it.next()) |line| {
|
|
if (line.len == 0) return null;
|
|
var tokens_it = std.mem.tokenize(u8, line, " ");
|
|
const lineNumber = try std.fmt.parseInt(usize, tokens_it.next().?, 10);
|
|
const colNumber = try std.fmt.parseInt(usize, tokens_it.next().?, 10);
|
|
const typ_text = tokens_it.next().?;
|
|
const typ = TokenType.fromString(typ_text);
|
|
const pre_value_index = tokens_it.index;
|
|
const value = tokens_it.next();
|
|
var token = Token{ .line = lineNumber, .col = colNumber, .typ = typ };
|
|
if (value) |val| {
|
|
const token_value = blk: {
|
|
switch (typ) {
|
|
.string, .identifier => {
|
|
tokens_it.index = pre_value_index;
|
|
break :blk TokenValue{ .string = tokens_it.rest() };
|
|
},
|
|
.integer => break :blk TokenValue{ .integer = try std.fmt.parseInt(i32, val, 10) },
|
|
else => unreachable,
|
|
}
|
|
};
|
|
token.value = token_value;
|
|
}
|
|
return token;
|
|
} else {
|
|
return null;
|
|
}
|
|
}
|
|
};
|
|
|
|
fn stringToTokenList(allocator: std.mem.Allocator, str: []const u8) !std.ArrayList(Token) {
|
|
var result = std.ArrayList(Token).init(allocator);
|
|
var lexer_output_it = LexerOutputTokenizer.init(str);
|
|
while (try lexer_output_it.next()) |token| {
|
|
try result.append(token);
|
|
}
|
|
return result;
|
|
}
|