503 lines
18 KiB
JavaScript
503 lines
18 KiB
JavaScript
// Required Node.js modules
|
|
const fs = require('fs');
|
|
const path = require('path'); // Useful for joining paths if needed
|
|
|
|
// --- Constants ---
|
|
const WORD_SIZE = 4; // In bytes
|
|
|
|
// --- Enums (JavaScript Object Equivalents) ---
|
|
|
|
const Mnemonic = {
|
|
NONE: { ordinal: 0, name: 'NONE' },
|
|
FETCH: { ordinal: 1, name: 'FETCH' },
|
|
STORE: { ordinal: 2, name: 'STORE' },
|
|
PUSH: { ordinal: 3, name: 'PUSH' },
|
|
ADD: { ordinal: 4, name: 'ADD' },
|
|
SUB: { ordinal: 5, name: 'SUB' },
|
|
MUL: { ordinal: 6, name: 'MUL' },
|
|
DIV: { ordinal: 7, name: 'DIV' },
|
|
MOD: { ordinal: 8, name: 'MOD' },
|
|
LT: { ordinal: 9, name: 'LT' },
|
|
GT: { ordinal: 10, name: 'GT' },
|
|
LE: { ordinal: 11, name: 'LE' },
|
|
GE: { ordinal: 12, name: 'GE' },
|
|
EQ: { ordinal: 13, name: 'EQ' },
|
|
NE: { ordinal: 14, name: 'NE' },
|
|
AND: { ordinal: 15, name: 'AND' },
|
|
OR: { ordinal: 16, name: 'OR' },
|
|
NEG: { ordinal: 17, name: 'NEG' },
|
|
NOT: { ordinal: 18, name: 'NOT' },
|
|
JMP: { ordinal: 19, name: 'JMP' },
|
|
JZ: { ordinal: 20, name: 'JZ' },
|
|
PRTC: { ordinal: 21, name: 'PRTC' },
|
|
PRTS: { ordinal: 22, name: 'PRTS' },
|
|
PRTI: { ordinal: 23, name: 'PRTI' },
|
|
HALT: { ordinal: 24, name: 'HALT' },
|
|
};
|
|
|
|
// Helper array to map ordinal back to Mnemonic object (for listCode)
|
|
const MnemonicByOrdinal = Object.values(Mnemonic).sort((a, b) => a.ordinal - b.ordinal);
|
|
|
|
|
|
const NodeType = {
|
|
nd_None: { name: 'None', mnemonic: Mnemonic.NONE },
|
|
nd_Ident: { name: 'Identifier', mnemonic: Mnemonic.NONE },
|
|
nd_String: { name: 'String', mnemonic: Mnemonic.NONE },
|
|
nd_Integer: { name: 'Integer', mnemonic: Mnemonic.NONE },
|
|
nd_Sequence: { name: 'Sequence', mnemonic: Mnemonic.NONE },
|
|
nd_If: { name: 'If', mnemonic: Mnemonic.NONE },
|
|
nd_Prtc: { name: 'Prtc', mnemonic: Mnemonic.PRTC },
|
|
nd_Prts: { name: 'Prts', mnemonic: Mnemonic.PRTS },
|
|
nd_Prti: { name: 'Prti', mnemonic: Mnemonic.PRTI },
|
|
nd_While: { name: 'While', mnemonic: Mnemonic.NONE },
|
|
nd_Assign: { name: 'Assign', mnemonic: Mnemonic.NONE },
|
|
nd_Negate: { name: 'Negate', mnemonic: Mnemonic.NEG },
|
|
nd_Not: { name: 'Not', mnemonic: Mnemonic.NOT },
|
|
nd_Mul: { name: 'Multiply', mnemonic: Mnemonic.MUL },
|
|
nd_Div: { name: 'Divide', mnemonic: Mnemonic.DIV },
|
|
nd_Mod: { name: 'Mod', mnemonic: Mnemonic.MOD },
|
|
nd_Add: { name: 'Add', mnemonic: Mnemonic.ADD },
|
|
nd_Sub: { name: 'Subtract', mnemonic: Mnemonic.SUB },
|
|
nd_Lss: { name: 'Less', mnemonic: Mnemonic.LT },
|
|
nd_Leq: { name: 'LessEqual', mnemonic: Mnemonic.LE },
|
|
nd_Gtr: { name: 'Greater', mnemonic: Mnemonic.GT },
|
|
nd_Geq: { name: 'GreaterEqual', mnemonic: Mnemonic.GE },
|
|
nd_Eql: { name: 'Equal', mnemonic: Mnemonic.EQ },
|
|
nd_Neq: { name: 'NotEqual', mnemonic: Mnemonic.NE },
|
|
nd_And: { name: 'And', mnemonic: Mnemonic.AND },
|
|
nd_Or: { name: 'Or', mnemonic: Mnemonic.OR },
|
|
};
|
|
|
|
|
|
// --- AST Node Class ---
|
|
class Node {
|
|
constructor(nt = null, left = null, right = null, value = null) {
|
|
this.nt = nt;
|
|
this.left = left;
|
|
this.right = right;
|
|
this.value = value;
|
|
}
|
|
|
|
static makeNode(nodetype, left, right) {
|
|
return new Node(nodetype, left, right, null);
|
|
}
|
|
|
|
static makeNode1(nodetype, left) {
|
|
return new Node(nodetype, left, null, null);
|
|
}
|
|
|
|
static makeLeaf(nodetype, value) {
|
|
return new Node(nodetype, null, null, value);
|
|
}
|
|
}
|
|
|
|
|
|
// --- Code Generator State and Methods ---
|
|
|
|
let code = new Uint8Array(0); // Use Uint8Array for byte code
|
|
const strToNodes = new Map();
|
|
const stringPool = [];
|
|
const variables = [];
|
|
let stringCount = 0;
|
|
let varCount = 0;
|
|
|
|
const unaryOps = [
|
|
NodeType.nd_Negate, NodeType.nd_Not
|
|
];
|
|
const operators = [
|
|
NodeType.nd_Mul, NodeType.nd_Div, NodeType.nd_Mod, NodeType.nd_Add, NodeType.nd_Sub,
|
|
NodeType.nd_Lss, NodeType.nd_Leq, NodeType.nd_Gtr, NodeType.nd_Geq,
|
|
NodeType.nd_Eql, NodeType.nd_Neq, NodeType.nd_And, NodeType.nd_Or
|
|
];
|
|
|
|
// State for reading AST from lines
|
|
let inputLines = [];
|
|
let currentLineIndex = 0;
|
|
|
|
|
|
function appendToCode(b) {
|
|
const newCode = new Uint8Array(code.length + 1);
|
|
newCode.set(code); // Copy existing bytes
|
|
newCode[code.length] = b & 0xff; // Add new byte (ensure it's 0-255)
|
|
code = newCode;
|
|
}
|
|
|
|
function emitByte(m) {
|
|
appendToCode(m.ordinal);
|
|
}
|
|
|
|
function emitWord(n) {
|
|
// Ensure n is treated as a 32-bit integer (signed or unsigned depends on context)
|
|
// Emit bytes in big-endian order
|
|
appendToCode((n >> 24) & 0xff);
|
|
appendToCode((n >> 16) & 0xff);
|
|
appendToCode((n >> 8) & 0xff);
|
|
appendToCode(n & 0xff);
|
|
}
|
|
|
|
function emitWordAt(pos, n) {
|
|
if (pos + WORD_SIZE > code.length) {
|
|
throw new Error(`Emit word out of bounds at position ${pos}`);
|
|
}
|
|
// Ensure n is treated as a 32-bit integer
|
|
code[pos] = (n >> 24) & 0xff;
|
|
code[pos + 1] = (n >> 16) & 0xff;
|
|
code[pos + 2] = (n >> 8) & 0xff;
|
|
code[pos + 3] = n & 0xff;
|
|
}
|
|
|
|
function getWord(pos) {
|
|
if (pos + WORD_SIZE > code.length) {
|
|
throw new Error(`Get word out of bounds at position ${pos}`);
|
|
}
|
|
// Read bytes in big-endian order and combine into a 32-bit integer
|
|
// Need to treat bytes as unsigned (0-255) using & 0xff
|
|
let result = 0;
|
|
result |= (code[pos] & 0xff) << 24;
|
|
result |= (code[pos + 1] & 0xff) << 16;
|
|
result |= (code[pos + 2] & 0xff) << 8;
|
|
result |= (code[pos + 3] & 0xff);
|
|
|
|
// Handle sign bit if the result is expected to be signed
|
|
// In JS, bitwise ops treat numbers as signed 32-bit.
|
|
// The above combination *might* result in a negative number
|
|
// if the most significant bit is set (for values >= 2^31).
|
|
// If the VM expects unsigned, this is fine. If signed,
|
|
// JS handles this conversion reasonably well after the bitwise ops.
|
|
// Let's return the potentially signed result from the bitwise ops.
|
|
return result;
|
|
}
|
|
|
|
|
|
function fetchVarOffset(name) {
|
|
let n = variables.indexOf(name);
|
|
if (n === -1) {
|
|
variables.push(name);
|
|
n = varCount++;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
function fetchStringOffset(str) {
|
|
let n = stringPool.indexOf(str);
|
|
if (n === -1) {
|
|
stringPool.push(str);
|
|
n = stringCount++;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
function hole() {
|
|
const t = code.length;
|
|
emitWord(0); // Emit a placeholder word (0)
|
|
return t; // Return the position of the hole
|
|
}
|
|
|
|
function arrayContains(arr, item) {
|
|
return arr.includes(item); // JavaScript Array.prototype.includes is equivalent
|
|
}
|
|
|
|
function codeGen(x) {
|
|
let n, p1, p2;
|
|
if (x === null) return;
|
|
|
|
switch (x.nt) {
|
|
case NodeType.nd_None:
|
|
return;
|
|
case NodeType.nd_Ident:
|
|
emitByte(Mnemonic.FETCH);
|
|
n = fetchVarOffset(x.value);
|
|
emitWord(n);
|
|
break;
|
|
case NodeType.nd_Integer:
|
|
emitByte(Mnemonic.PUSH);
|
|
emitWord(parseInt(x.value, 10)); // Use radix 10
|
|
break;
|
|
case NodeType.nd_String:
|
|
emitByte(Mnemonic.PUSH);
|
|
n = fetchStringOffset(x.value);
|
|
emitWord(n);
|
|
break;
|
|
case NodeType.nd_Assign:
|
|
n = fetchVarOffset(x.left.value);
|
|
codeGen(x.right);
|
|
emitByte(Mnemonic.STORE);
|
|
emitWord(n);
|
|
break;
|
|
case NodeType.nd_If:
|
|
// p2 needs scope beyond the if block, initialize before switch case
|
|
codeGen(x.left); // Condition
|
|
emitByte(Mnemonic.JZ); // Jump if condition is zero (false)
|
|
p1 = hole(); // Placeholder for jump address (to 'else' or end of 'then')
|
|
|
|
codeGen(x.right.left); // 'then' block
|
|
|
|
if (x.right.right !== null) { // Check if 'else' block exists
|
|
emitByte(Mnemonic.JMP); // Jump over 'else' block after 'then'
|
|
p2 = hole(); // Placeholder for jump address (to end of 'else')
|
|
}
|
|
|
|
// Patch the JZ instruction: jump relative from instruction *after* the word
|
|
emitWordAt(p1, code.length - (p1 + WORD_SIZE));
|
|
|
|
if (x.right.right !== null) {
|
|
codeGen(x.right.right); // 'else' block
|
|
// Patch the JMP instruction: jump relative from instruction *after* the word
|
|
emitWordAt(p2, code.length - (p2 + WORD_SIZE));
|
|
}
|
|
break;
|
|
case NodeType.nd_While:
|
|
p1 = code.length; // Start of the loop condition
|
|
codeGen(x.left); // Condition
|
|
emitByte(Mnemonic.JZ); // Jump if condition is zero (false)
|
|
p2 = hole(); // Placeholder for jump address (to end of loop)
|
|
|
|
codeGen(x.right); // Loop body
|
|
|
|
emitByte(Mnemonic.JMP); // Jump back to the start of the loop condition
|
|
// Jump relative from instruction *after* the word
|
|
emitWord(p1 - (code.length + WORD_SIZE));
|
|
|
|
// Patch the JZ instruction: jump relative from instruction *after* the word
|
|
emitWordAt(p2, code.length - (p2 + WORD_SIZE));
|
|
break;
|
|
case NodeType.nd_Sequence:
|
|
codeGen(x.left);
|
|
codeGen(x.right);
|
|
break;
|
|
case NodeType.nd_Prtc:
|
|
codeGen(x.left);
|
|
emitByte(Mnemonic.PRTC);
|
|
break;
|
|
case NodeType.nd_Prti:
|
|
codeGen(x.left);
|
|
emitByte(Mnemonic.PRTI);
|
|
break;
|
|
case NodeType.nd_Prts:
|
|
codeGen(x.left);
|
|
emitByte(Mnemonic.PRTS);
|
|
break;
|
|
default:
|
|
if (arrayContains(operators, x.nt)) {
|
|
codeGen(x.left);
|
|
codeGen(x.right);
|
|
emitByte(x.nt.mnemonic); // Use the mnemonic property
|
|
} else if (arrayContains(unaryOps, x.nt)) {
|
|
codeGen(x.left);
|
|
emitByte(x.nt.mnemonic); // Use the mnemonic property
|
|
} else {
|
|
throw new Error(`Error in code generator! Found ${x.nt.name}, expecting operator.`);
|
|
}
|
|
}
|
|
}
|
|
|
|
function listCode() {
|
|
let pc = 0;
|
|
console.log(`Datasize: ${varCount} Strings: ${stringCount}`);
|
|
for (const s of stringPool) {
|
|
console.log(`"${s}"`); // Print strings, maybe quoted
|
|
}
|
|
|
|
console.log("\n--- Code ---");
|
|
|
|
while (pc < code.length) {
|
|
process.stdout.write(`${pc.toString().padStart(4, ' ')} `); // Format like printf %4d
|
|
const opcodeValue = code[pc++];
|
|
if (opcodeValue >= MnemonicByOrdinal.length) {
|
|
throw new Error(`Unknown opcode value ${opcodeValue} @ ${pc - 1}`);
|
|
}
|
|
const op = MnemonicByOrdinal[opcodeValue];
|
|
|
|
let x;
|
|
switch (op) {
|
|
case Mnemonic.FETCH:
|
|
x = getWord(pc);
|
|
process.stdout.write(`fetch [${x}]`);
|
|
pc += WORD_SIZE;
|
|
break;
|
|
case Mnemonic.STORE:
|
|
x = getWord(pc);
|
|
process.stdout.write(`store [${x}]`);
|
|
pc += WORD_SIZE;
|
|
break;
|
|
case Mnemonic.PUSH:
|
|
x = getWord(pc);
|
|
process.stdout.write(`push ${x}`);
|
|
pc += WORD_SIZE;
|
|
break;
|
|
case Mnemonic.ADD: case Mnemonic.SUB: case Mnemonic.MUL: case Mnemonic.DIV: case Mnemonic.MOD:
|
|
case Mnemonic.LT: case Mnemonic.GT: case Mnemonic.LE: case Mnemonic.GE: case Mnemonic.EQ: case Mnemonic.NE:
|
|
case Mnemonic.AND: case Mnemonic.OR: case Mnemonic.NEG: case Mnemonic.NOT:
|
|
case Mnemonic.PRTC: case Mnemonic.PRTI: case Mnemonic.PRTS: case Mnemonic.HALT:
|
|
process.stdout.write(op.name.toLowerCase());
|
|
break;
|
|
case Mnemonic.JMP:
|
|
x = getWord(pc); // Read relative offset
|
|
// Relative offset is calculated from the *instruction following the word* in the VM
|
|
// pc points to the byte *after* the word now.
|
|
process.stdout.write(`jmp (${x}) ${pc + x}`);
|
|
pc += WORD_SIZE;
|
|
break;
|
|
case Mnemonic.JZ:
|
|
x = getWord(pc); // Read relative offset
|
|
// Relative offset is calculated from the *instruction following the word* in the VM
|
|
// pc points to the byte *after* the word now.
|
|
process.stdout.write(`jz (${x}) ${pc + x}`);
|
|
pc += WORD_SIZE;
|
|
break;
|
|
default:
|
|
// Should not happen if MnemonicByOrdinal is correctly built
|
|
throw new Error(`Unknown opcode ${op.name} (${opcodeValue}) @ ${pc - 1}`);
|
|
}
|
|
console.log(); // Newline after each instruction
|
|
}
|
|
}
|
|
|
|
|
|
// Helper function to get the next line from the pre-loaded array
|
|
function getNextLine() {
|
|
if (currentLineIndex >= inputLines.length) {
|
|
return null; // End of input
|
|
}
|
|
return inputLines[currentLineIndex++];
|
|
}
|
|
|
|
function loadAst() {
|
|
let command, value;
|
|
let line = getNextLine();
|
|
|
|
while (line !== null) {
|
|
value = null;
|
|
// Check line length before substring to avoid errors
|
|
if (line.length > 15) {
|
|
command = line.substring(0, 15).trim();
|
|
value = line.substring(15).trim();
|
|
} else {
|
|
command = line.trim();
|
|
}
|
|
|
|
if (command === ";") {
|
|
return null; // End of a sequence or node definition
|
|
}
|
|
|
|
const nodeType = strToNodes.get(command);
|
|
if (!nodeType) {
|
|
throw new Error(`Command not found: '${command}' on line ${currentLineIndex}`);
|
|
}
|
|
|
|
if (value !== null && value !== "") { // Check value is not just empty string
|
|
// Note: The Java code checks value != null. If the AST format
|
|
// allows "Identifier ", value will be "" after trim.
|
|
// Let's stick closer to Java's null check, assuming non-leaf
|
|
// nodes won't have anything after column 15 except whitespace.
|
|
// Re-evaluating Java: `value != null` is set based on `line.length > 16`.
|
|
// Let's emulate that logic closely.
|
|
|
|
// Java logic: value = line.substring(15).trim(); IF line.length > 16
|
|
// Then `if (value != null)` returns leaf.
|
|
// If line.length <= 16, command = line.trim(), value = null.
|
|
// So leaf nodes MUST have something after col 15.
|
|
// Let's adjust: if line.length > 15, value = line.substring(15).trim().
|
|
// Then check if value is non-empty. This handles "Identifier " as non-leaf.
|
|
|
|
// Corrected logic:
|
|
if (line.length > 15 && line.substring(15).trim() !== "") {
|
|
value = line.substring(15).trim();
|
|
return Node.makeLeaf(nodeType, value);
|
|
}
|
|
// If line.length <= 15 OR substring(15).trim() is empty,
|
|
// it's not a leaf with a value in that part of the line.
|
|
// Fall through to non-leaf handling.
|
|
}
|
|
|
|
|
|
// If it's not a leaf (or the leaf value part was empty/absent based on col 15)
|
|
// Recursively load children
|
|
const left = loadAst();
|
|
const right = loadAst(); // Might be null for unary/sequence end
|
|
|
|
// Some nodes like Prtc, Prti, Prts, Negate, Not only have a left child
|
|
// Based on the Java code_gen, these are treated as makeNode1 implicitly
|
|
// but load_ast always tries to read two children.
|
|
// The AST format must represent unary nodes with a ";" after the single child.
|
|
// Example: Prtc\n Integer 10\n ;\n ;
|
|
// This recursive loadAst handles that structure.
|
|
return Node.makeNode(nodeType, left, right);
|
|
}
|
|
|
|
// Should ideally not reach here if input is well-formed and ends with AST
|
|
// But needed for compiler in Java, let's return null in JS too for safety.
|
|
return null;
|
|
}
|
|
|
|
|
|
// --- Initialization ---
|
|
function initialize() {
|
|
strToNodes.set(";", NodeType.nd_None); // Special case for AST parsing termination
|
|
strToNodes.set("Sequence", NodeType.nd_Sequence);
|
|
strToNodes.set("Identifier", NodeType.nd_Ident);
|
|
strToNodes.set("String", NodeType.nd_String);
|
|
strToNodes.set("Integer", NodeType.nd_Integer);
|
|
strToNodes.set("If", NodeType.nd_If);
|
|
strToNodes.set("While", NodeType.nd_While);
|
|
strToNodes.set("Prtc", NodeType.nd_Prtc);
|
|
strToNodes.set("Prts", NodeType.nd_Prts);
|
|
strToNodes.set("Prti", NodeType.nd_Prti);
|
|
strToNodes.set("Assign", NodeType.nd_Assign);
|
|
strToNodes.set("Negate", NodeType.nd_Negate);
|
|
strToNodes.set("Not", NodeType.nd_Not);
|
|
strToNodes.set("Multiply", NodeType.nd_Mul);
|
|
strToNodes.set("Divide", NodeType.nd_Div);
|
|
strToNodes.set("Mod", NodeType.nd_Mod);
|
|
strToNodes.set("Add", NodeType.nd_Add);
|
|
strToNodes.set("Subtract", NodeType.nd_Sub);
|
|
strToNodes.set("Less", NodeType.nd_Lss);
|
|
strToNodes.set("LessEqual", NodeType.nd_Leq);
|
|
strToNodes.set("Greater", NodeType.nd_Gtr);
|
|
strToNodes.set("GreaterEqual", NodeType.nd_Geq);
|
|
strToNodes.set("Equal", NodeType.nd_Eql);
|
|
strToNodes.set("NotEqual", NodeType.nd_Neq);
|
|
strToNodes.set("And", NodeType.nd_And);
|
|
strToNodes.set("Or", NodeType.nd_Or);
|
|
}
|
|
|
|
|
|
// --- Main Execution ---
|
|
function main() {
|
|
initialize();
|
|
|
|
const args = process.argv.slice(2); // Get command line arguments excluding 'node' and script name
|
|
|
|
if (args.length > 0) {
|
|
const filename = args[0];
|
|
try {
|
|
// Read the entire file synchronously for simpler line-by-line processing
|
|
const fileContent = fs.readFileSync(filename, 'utf8');
|
|
inputLines = fileContent.split(/\r?\n/); // Split into lines, handling common line endings
|
|
currentLineIndex = 0; // Reset line index for loadAst
|
|
|
|
const ast = loadAst();
|
|
|
|
if (ast) {
|
|
codeGen(ast);
|
|
emitByte(Mnemonic.HALT); // Emit HALT after generating code for the main AST
|
|
listCode();
|
|
} else {
|
|
console.log("No valid AST loaded.");
|
|
}
|
|
|
|
|
|
} catch (e) {
|
|
console.error(`Error: ${e.message}`);
|
|
// console.error(e.stack); // Uncomment for detailed stack trace
|
|
}
|
|
} else {
|
|
console.log("Usage: node code_generator.js <ast_file>");
|
|
}
|
|
}
|
|
|
|
// Execute the main function
|
|
if (require.main === module) {
|
|
main();
|
|
}
|