const std = @import("std"); pub const CodeGeneratorError = error{OutOfMemory}; pub const CodeGenerator = struct { allocator: std.mem.Allocator, string_pool: std.ArrayList([]const u8), globals: std.ArrayList([]const u8), bytecode: std.ArrayList(u8), const Self = @This(); const word_size = @sizeOf(i32); pub fn init( allocator: std.mem.Allocator, string_pool: std.ArrayList([]const u8), globals: std.ArrayList([]const u8), ) Self { return CodeGenerator{ .allocator = allocator, .string_pool = string_pool, .globals = globals, .bytecode = std.ArrayList(u8).init(allocator), }; } pub fn gen(self: *Self, ast: ?*Tree) CodeGeneratorError!void { try self.genH(ast); try self.emitHalt(); } // Helper function to allow recursion. pub fn genH(self: *Self, ast: ?*Tree) CodeGeneratorError!void { if (ast) |t| { switch (t.typ) { .sequence => { try self.genH(t.left); try self.genH(t.right); }, .kw_while => { const condition_address = self.currentAddress(); try self.genH(t.left); try self.emitByte(.jz); const condition_address_hole = self.currentAddress(); try self.emitHole(); try self.genH(t.right); try self.emitByte(.jmp); try self.emitInt(condition_address); self.insertInt(condition_address_hole, self.currentAddress()); }, .kw_if => { try self.genH(t.left); try self.emitByte(.jz); const condition_address_hole = self.currentAddress(); try self.emitHole(); try self.genH(t.right.?.left); if (t.right.?.right) |else_tree| { try self.emitByte(.jmp); const else_address_hole = self.currentAddress(); try self.emitHole(); const else_address = self.currentAddress(); try self.genH(else_tree); self.insertInt(condition_address_hole, else_address); self.insertInt(else_address_hole, self.currentAddress()); } else { self.insertInt(condition_address_hole, self.currentAddress()); } }, .assign => { try self.genH(t.right); try self.emitByte(.store); try self.emitInt(self.fetchGlobalsOffset(t.left.?.value.?.string)); }, .prts => { try self.genH(t.left); try self.emitByte(.prts); }, .prti => { try self.genH(t.left); try self.emitByte(.prti); }, .prtc => { try self.genH(t.left); try self.emitByte(.prtc); }, .string => { try self.emitByte(.push); try self.emitInt(self.fetchStringsOffset(t.value.?.string)); }, .integer => { try self.emitByte(.push); try self.emitInt(t.value.?.integer); }, .identifier => { try self.emitByte(.fetch); try self.emitInt(self.fetchGlobalsOffset(t.value.?.string)); }, .negate, .not => { try self.genH(t.left); try self.emitByte(Op.fromNodeType(t.typ).?); }, .add, .multiply, .subtract, .divide, .mod, .less, .less_equal, .greater, .greater_equal, .equal, .not_equal, .bool_and, .bool_or, => try self.genBinOp(t), .unknown => { std.debug.print("\nINTERP: UNKNOWN {}\n", .{t.typ}); std.os.exit(1); }, } } } fn genBinOp(self: *Self, tree: *Tree) CodeGeneratorError!void { try self.genH(tree.left); try self.genH(tree.right); try self.emitByte(Op.fromNodeType(tree.typ).?); } fn emitByte(self: *Self, op: Op) CodeGeneratorError!void { try self.bytecode.append(@enumToInt(op)); } fn emitInt(self: *Self, n: i32) CodeGeneratorError!void { var n_var = n; var n_bytes = @ptrCast(*[4]u8, &n_var); for (n_bytes) |byte| { try self.bytecode.append(byte); } } // Holes are later populated via `insertInt` because they can't be known when // we populate the bytecode array sequentially. fn emitHole(self: *Self) CodeGeneratorError!void { try self.emitInt(std.math.maxInt(i32)); } // Populates the "hole" produced by `emitHole`. fn insertInt(self: *Self, address: i32, n: i32) void { var i: i32 = 0; var n_var = n; var n_bytes = @ptrCast(*[4]u8, &n_var); while (i < word_size) : (i += 1) { self.bytecode.items[@intCast(usize, address + i)] = n_bytes[@intCast(usize, i)]; } } fn emitHalt(self: *Self) CodeGeneratorError!void { try self.bytecode.append(@enumToInt(Op.halt)); } fn currentAddress(self: Self) i32 { return @intCast(i32, self.bytecode.items.len); } fn fetchStringsOffset(self: Self, str: []const u8) i32 { for (self.string_pool.items) |string, idx| { if (std.mem.eql(u8, string, str)) { return @intCast(i32, idx); } } unreachable; } fn fetchGlobalsOffset(self: Self, str: []const u8) i32 { for (self.globals.items) |global, idx| { if (std.mem.eql(u8, global, str)) { return @intCast(i32, idx); } } unreachable; } pub fn print(self: Self) ![]u8 { var result = std.ArrayList(u8).init(self.allocator); var writer = result.writer(); try writer.print( "Datasize: {d} Strings: {d}\n", .{ self.globals.items.len, self.string_pool.items.len }, ); for (self.string_pool.items) |string| { try writer.print("{s}\n", .{string}); } var pc: usize = 0; while (pc < self.bytecode.items.len) : (pc += 1) { try writer.print("{d:>5} ", .{pc}); switch (@intToEnum(Op, self.bytecode.items[pc])) { .push => { try writer.print("push {d}\n", .{self.unpackInt(pc + 1)}); pc += word_size; }, .store => { try writer.print("store [{d}]\n", .{self.unpackInt(pc + 1)}); pc += word_size; }, .fetch => { try writer.print("fetch [{d}]\n", .{self.unpackInt(pc + 1)}); pc += word_size; }, .jz => { const address = self.unpackInt(pc + 1); try writer.print("jz ({d}) {d}\n", .{ address - @intCast(i32, pc) - 1, address }); pc += word_size; }, .jmp => { const address = self.unpackInt(pc + 1); try writer.print("jmp ({d}) {d}\n", .{ address - @intCast(i32, pc) - 1, address }); pc += word_size; }, else => try writer.print("{s}\n", .{Op.toString(@intToEnum(Op, self.bytecode.items[pc]))}), } } return result.items; } fn unpackInt(self: Self, pc: usize) i32 { const arg_ptr = @ptrCast(*[4]u8, self.bytecode.items[pc .. pc + word_size]); var arg_array = arg_ptr.*; const arg = @ptrCast(*i32, @alignCast(@alignOf(i32), &arg_array)); return arg.*; } }; pub const Op = enum(u8) { fetch, store, push, add, sub, mul, div, mod, lt, gt, le, ge, eq, ne, @"and", @"or", neg, not, jmp, jz, prtc, prts, prti, halt, const from_node = std.enums.directEnumArray(NodeType, ?Op, 0, .{ .unknown = null, .identifier = null, .string = null, .integer = null, .sequence = null, .kw_if = null, .prtc = null, .prts = null, .prti = null, .kw_while = null, .assign = null, .negate = .neg, .not = .not, .multiply = .mul, .divide = .div, .mod = .mod, .add = .add, .subtract = .sub, .less = .lt, .less_equal = .le, .greater = .gt, .greater_equal = .ge, .equal = .eq, .not_equal = .ne, .bool_and = .@"and", .bool_or = .@"or", }); pub fn fromNodeType(node_type: NodeType) ?Op { return from_node[@enumToInt(node_type)]; } const to_string = std.enums.directEnumArray(Op, []const u8, 0, .{ .fetch = "fetch", .store = "store", .push = "push", .add = "add", .sub = "sub", .mul = "mul", .div = "div", .mod = "mod", .lt = "lt", .gt = "gt", .le = "le", .ge = "ge", .eq = "eq", .ne = "ne", .@"and" = "and", .@"or" = "or", .neg = "neg", .not = "not", .jmp = "jmp", .jz = "jz", .prtc = "prtc", .prts = "prts", .prti = "prti", .halt = "halt", }); pub fn toString(self: Op) []const u8 { return to_string[@enumToInt(self)]; } }; pub fn main() !void { var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena.deinit(); const allocator = arena.allocator(); var arg_it = std.process.args(); _ = try arg_it.next(allocator) orelse unreachable; // program name const file_name = arg_it.next(allocator); // We accept both files and standard input. var file_handle = blk: { if (file_name) |file_name_delimited| { const fname: []const u8 = try file_name_delimited; break :blk try std.fs.cwd().openFile(fname, .{}); } else { break :blk std.io.getStdIn(); } }; defer file_handle.close(); const input_content = try file_handle.readToEndAlloc(allocator, std.math.maxInt(usize)); var string_pool = std.ArrayList([]const u8).init(allocator); var globals = std.ArrayList([]const u8).init(allocator); const ast = try loadAST(allocator, input_content, &string_pool, &globals); var code_generator = CodeGenerator.init(allocator, string_pool, globals); try code_generator.gen(ast); const result: []const u8 = try code_generator.print(); _ = try std.io.getStdOut().write(result); } pub const NodeType = enum { unknown, identifier, string, integer, sequence, kw_if, prtc, prts, prti, kw_while, assign, negate, not, multiply, divide, mod, add, subtract, less, less_equal, greater, greater_equal, equal, not_equal, bool_and, bool_or, const from_string_map = std.ComptimeStringMap(NodeType, .{ .{ "UNKNOWN", .unknown }, .{ "Identifier", .identifier }, .{ "String", .string }, .{ "Integer", .integer }, .{ "Sequence", .sequence }, .{ "If", .kw_if }, .{ "Prtc", .prtc }, .{ "Prts", .prts }, .{ "Prti", .prti }, .{ "While", .kw_while }, .{ "Assign", .assign }, .{ "Negate", .negate }, .{ "Not", .not }, .{ "Multiply", .multiply }, .{ "Divide", .divide }, .{ "Mod", .mod }, .{ "Add", .add }, .{ "Subtract", .subtract }, .{ "Less", .less }, .{ "LessEqual", .less_equal }, .{ "Greater", .greater }, .{ "GreaterEqual", .greater_equal }, .{ "Equal", .equal }, .{ "NotEqual", .not_equal }, .{ "And", .bool_and }, .{ "Or", .bool_or }, }); pub fn fromString(str: []const u8) NodeType { return from_string_map.get(str).?; } }; pub const NodeValue = union(enum) { integer: i32, string: []const u8, }; pub const Tree = struct { left: ?*Tree, right: ?*Tree, typ: NodeType = .unknown, value: ?NodeValue = null, fn makeNode(allocator: std.mem.Allocator, typ: NodeType, left: ?*Tree, right: ?*Tree) !*Tree { const result = try allocator.create(Tree); result.* = Tree{ .left = left, .right = right, .typ = typ }; return result; } fn makeLeaf(allocator: std.mem.Allocator, typ: NodeType, value: ?NodeValue) !*Tree { const result = try allocator.create(Tree); result.* = Tree{ .left = null, .right = null, .typ = typ, .value = value }; return result; } }; const LoadASTError = error{OutOfMemory} || std.fmt.ParseIntError; fn loadAST( allocator: std.mem.Allocator, str: []const u8, string_pool: *std.ArrayList([]const u8), globals: *std.ArrayList([]const u8), ) LoadASTError!?*Tree { var line_it = std.mem.split(u8, str, "\n"); return try loadASTHelper(allocator, &line_it, string_pool, globals); } fn loadASTHelper( allocator: std.mem.Allocator, line_it: *std.mem.SplitIterator(u8), string_pool: *std.ArrayList([]const u8), globals: *std.ArrayList([]const u8), ) LoadASTError!?*Tree { if (line_it.next()) |line| { var tok_it = std.mem.tokenize(u8, line, " "); const tok_str = tok_it.next().?; if (tok_str[0] == ';') return null; const node_type = NodeType.fromString(tok_str); const pre_iteration_index = tok_it.index; if (tok_it.next()) |leaf_value| { const node_value = blk: { switch (node_type) { .integer => break :blk NodeValue{ .integer = try std.fmt.parseInt(i32, leaf_value, 10) }, .identifier => { var already_exists = false; for (globals.items) |global| { if (std.mem.eql(u8, global, leaf_value)) { already_exists = true; break; } } if (!already_exists) try globals.append(leaf_value); break :blk NodeValue{ .string = leaf_value }; }, .string => { tok_it.index = pre_iteration_index; const str = tok_it.rest(); var already_exists = false; for (string_pool.items) |string| { if (std.mem.eql(u8, string, str)) { already_exists = true; break; } } if (!already_exists) try string_pool.append(str); break :blk NodeValue{ .string = str }; }, else => unreachable, } }; return try Tree.makeLeaf(allocator, node_type, node_value); } const left = try loadASTHelper(allocator, line_it, string_pool, globals); const right = try loadASTHelper(allocator, line_it, string_pool, globals); return try Tree.makeNode(allocator, node_type, left, right); } else { return null; } }