RosettaCodeData/Task/Compiler-code-generator/JavaScript/compiler-code-generator.js

503 lines
18 KiB
JavaScript

// Required Node.js modules
const fs = require('fs');
const path = require('path'); // Useful for joining paths if needed
// --- Constants ---
const WORD_SIZE = 4; // In bytes
// --- Enums (JavaScript Object Equivalents) ---
const Mnemonic = {
NONE: { ordinal: 0, name: 'NONE' },
FETCH: { ordinal: 1, name: 'FETCH' },
STORE: { ordinal: 2, name: 'STORE' },
PUSH: { ordinal: 3, name: 'PUSH' },
ADD: { ordinal: 4, name: 'ADD' },
SUB: { ordinal: 5, name: 'SUB' },
MUL: { ordinal: 6, name: 'MUL' },
DIV: { ordinal: 7, name: 'DIV' },
MOD: { ordinal: 8, name: 'MOD' },
LT: { ordinal: 9, name: 'LT' },
GT: { ordinal: 10, name: 'GT' },
LE: { ordinal: 11, name: 'LE' },
GE: { ordinal: 12, name: 'GE' },
EQ: { ordinal: 13, name: 'EQ' },
NE: { ordinal: 14, name: 'NE' },
AND: { ordinal: 15, name: 'AND' },
OR: { ordinal: 16, name: 'OR' },
NEG: { ordinal: 17, name: 'NEG' },
NOT: { ordinal: 18, name: 'NOT' },
JMP: { ordinal: 19, name: 'JMP' },
JZ: { ordinal: 20, name: 'JZ' },
PRTC: { ordinal: 21, name: 'PRTC' },
PRTS: { ordinal: 22, name: 'PRTS' },
PRTI: { ordinal: 23, name: 'PRTI' },
HALT: { ordinal: 24, name: 'HALT' },
};
// Helper array to map ordinal back to Mnemonic object (for listCode)
const MnemonicByOrdinal = Object.values(Mnemonic).sort((a, b) => a.ordinal - b.ordinal);
const NodeType = {
nd_None: { name: 'None', mnemonic: Mnemonic.NONE },
nd_Ident: { name: 'Identifier', mnemonic: Mnemonic.NONE },
nd_String: { name: 'String', mnemonic: Mnemonic.NONE },
nd_Integer: { name: 'Integer', mnemonic: Mnemonic.NONE },
nd_Sequence: { name: 'Sequence', mnemonic: Mnemonic.NONE },
nd_If: { name: 'If', mnemonic: Mnemonic.NONE },
nd_Prtc: { name: 'Prtc', mnemonic: Mnemonic.PRTC },
nd_Prts: { name: 'Prts', mnemonic: Mnemonic.PRTS },
nd_Prti: { name: 'Prti', mnemonic: Mnemonic.PRTI },
nd_While: { name: 'While', mnemonic: Mnemonic.NONE },
nd_Assign: { name: 'Assign', mnemonic: Mnemonic.NONE },
nd_Negate: { name: 'Negate', mnemonic: Mnemonic.NEG },
nd_Not: { name: 'Not', mnemonic: Mnemonic.NOT },
nd_Mul: { name: 'Multiply', mnemonic: Mnemonic.MUL },
nd_Div: { name: 'Divide', mnemonic: Mnemonic.DIV },
nd_Mod: { name: 'Mod', mnemonic: Mnemonic.MOD },
nd_Add: { name: 'Add', mnemonic: Mnemonic.ADD },
nd_Sub: { name: 'Subtract', mnemonic: Mnemonic.SUB },
nd_Lss: { name: 'Less', mnemonic: Mnemonic.LT },
nd_Leq: { name: 'LessEqual', mnemonic: Mnemonic.LE },
nd_Gtr: { name: 'Greater', mnemonic: Mnemonic.GT },
nd_Geq: { name: 'GreaterEqual', mnemonic: Mnemonic.GE },
nd_Eql: { name: 'Equal', mnemonic: Mnemonic.EQ },
nd_Neq: { name: 'NotEqual', mnemonic: Mnemonic.NE },
nd_And: { name: 'And', mnemonic: Mnemonic.AND },
nd_Or: { name: 'Or', mnemonic: Mnemonic.OR },
};
// --- AST Node Class ---
class Node {
constructor(nt = null, left = null, right = null, value = null) {
this.nt = nt;
this.left = left;
this.right = right;
this.value = value;
}
static makeNode(nodetype, left, right) {
return new Node(nodetype, left, right, null);
}
static makeNode1(nodetype, left) {
return new Node(nodetype, left, null, null);
}
static makeLeaf(nodetype, value) {
return new Node(nodetype, null, null, value);
}
}
// --- Code Generator State and Methods ---
let code = new Uint8Array(0); // Use Uint8Array for byte code
const strToNodes = new Map();
const stringPool = [];
const variables = [];
let stringCount = 0;
let varCount = 0;
const unaryOps = [
NodeType.nd_Negate, NodeType.nd_Not
];
const operators = [
NodeType.nd_Mul, NodeType.nd_Div, NodeType.nd_Mod, NodeType.nd_Add, NodeType.nd_Sub,
NodeType.nd_Lss, NodeType.nd_Leq, NodeType.nd_Gtr, NodeType.nd_Geq,
NodeType.nd_Eql, NodeType.nd_Neq, NodeType.nd_And, NodeType.nd_Or
];
// State for reading AST from lines
let inputLines = [];
let currentLineIndex = 0;
function appendToCode(b) {
const newCode = new Uint8Array(code.length + 1);
newCode.set(code); // Copy existing bytes
newCode[code.length] = b & 0xff; // Add new byte (ensure it's 0-255)
code = newCode;
}
function emitByte(m) {
appendToCode(m.ordinal);
}
function emitWord(n) {
// Ensure n is treated as a 32-bit integer (signed or unsigned depends on context)
// Emit bytes in big-endian order
appendToCode((n >> 24) & 0xff);
appendToCode((n >> 16) & 0xff);
appendToCode((n >> 8) & 0xff);
appendToCode(n & 0xff);
}
function emitWordAt(pos, n) {
if (pos + WORD_SIZE > code.length) {
throw new Error(`Emit word out of bounds at position ${pos}`);
}
// Ensure n is treated as a 32-bit integer
code[pos] = (n >> 24) & 0xff;
code[pos + 1] = (n >> 16) & 0xff;
code[pos + 2] = (n >> 8) & 0xff;
code[pos + 3] = n & 0xff;
}
function getWord(pos) {
if (pos + WORD_SIZE > code.length) {
throw new Error(`Get word out of bounds at position ${pos}`);
}
// Read bytes in big-endian order and combine into a 32-bit integer
// Need to treat bytes as unsigned (0-255) using & 0xff
let result = 0;
result |= (code[pos] & 0xff) << 24;
result |= (code[pos + 1] & 0xff) << 16;
result |= (code[pos + 2] & 0xff) << 8;
result |= (code[pos + 3] & 0xff);
// Handle sign bit if the result is expected to be signed
// In JS, bitwise ops treat numbers as signed 32-bit.
// The above combination *might* result in a negative number
// if the most significant bit is set (for values >= 2^31).
// If the VM expects unsigned, this is fine. If signed,
// JS handles this conversion reasonably well after the bitwise ops.
// Let's return the potentially signed result from the bitwise ops.
return result;
}
function fetchVarOffset(name) {
let n = variables.indexOf(name);
if (n === -1) {
variables.push(name);
n = varCount++;
}
return n;
}
function fetchStringOffset(str) {
let n = stringPool.indexOf(str);
if (n === -1) {
stringPool.push(str);
n = stringCount++;
}
return n;
}
function hole() {
const t = code.length;
emitWord(0); // Emit a placeholder word (0)
return t; // Return the position of the hole
}
function arrayContains(arr, item) {
return arr.includes(item); // JavaScript Array.prototype.includes is equivalent
}
function codeGen(x) {
let n, p1, p2;
if (x === null) return;
switch (x.nt) {
case NodeType.nd_None:
return;
case NodeType.nd_Ident:
emitByte(Mnemonic.FETCH);
n = fetchVarOffset(x.value);
emitWord(n);
break;
case NodeType.nd_Integer:
emitByte(Mnemonic.PUSH);
emitWord(parseInt(x.value, 10)); // Use radix 10
break;
case NodeType.nd_String:
emitByte(Mnemonic.PUSH);
n = fetchStringOffset(x.value);
emitWord(n);
break;
case NodeType.nd_Assign:
n = fetchVarOffset(x.left.value);
codeGen(x.right);
emitByte(Mnemonic.STORE);
emitWord(n);
break;
case NodeType.nd_If:
// p2 needs scope beyond the if block, initialize before switch case
codeGen(x.left); // Condition
emitByte(Mnemonic.JZ); // Jump if condition is zero (false)
p1 = hole(); // Placeholder for jump address (to 'else' or end of 'then')
codeGen(x.right.left); // 'then' block
if (x.right.right !== null) { // Check if 'else' block exists
emitByte(Mnemonic.JMP); // Jump over 'else' block after 'then'
p2 = hole(); // Placeholder for jump address (to end of 'else')
}
// Patch the JZ instruction: jump relative from instruction *after* the word
emitWordAt(p1, code.length - (p1 + WORD_SIZE));
if (x.right.right !== null) {
codeGen(x.right.right); // 'else' block
// Patch the JMP instruction: jump relative from instruction *after* the word
emitWordAt(p2, code.length - (p2 + WORD_SIZE));
}
break;
case NodeType.nd_While:
p1 = code.length; // Start of the loop condition
codeGen(x.left); // Condition
emitByte(Mnemonic.JZ); // Jump if condition is zero (false)
p2 = hole(); // Placeholder for jump address (to end of loop)
codeGen(x.right); // Loop body
emitByte(Mnemonic.JMP); // Jump back to the start of the loop condition
// Jump relative from instruction *after* the word
emitWord(p1 - (code.length + WORD_SIZE));
// Patch the JZ instruction: jump relative from instruction *after* the word
emitWordAt(p2, code.length - (p2 + WORD_SIZE));
break;
case NodeType.nd_Sequence:
codeGen(x.left);
codeGen(x.right);
break;
case NodeType.nd_Prtc:
codeGen(x.left);
emitByte(Mnemonic.PRTC);
break;
case NodeType.nd_Prti:
codeGen(x.left);
emitByte(Mnemonic.PRTI);
break;
case NodeType.nd_Prts:
codeGen(x.left);
emitByte(Mnemonic.PRTS);
break;
default:
if (arrayContains(operators, x.nt)) {
codeGen(x.left);
codeGen(x.right);
emitByte(x.nt.mnemonic); // Use the mnemonic property
} else if (arrayContains(unaryOps, x.nt)) {
codeGen(x.left);
emitByte(x.nt.mnemonic); // Use the mnemonic property
} else {
throw new Error(`Error in code generator! Found ${x.nt.name}, expecting operator.`);
}
}
}
function listCode() {
let pc = 0;
console.log(`Datasize: ${varCount} Strings: ${stringCount}`);
for (const s of stringPool) {
console.log(`"${s}"`); // Print strings, maybe quoted
}
console.log("\n--- Code ---");
while (pc < code.length) {
process.stdout.write(`${pc.toString().padStart(4, ' ')} `); // Format like printf %4d
const opcodeValue = code[pc++];
if (opcodeValue >= MnemonicByOrdinal.length) {
throw new Error(`Unknown opcode value ${opcodeValue} @ ${pc - 1}`);
}
const op = MnemonicByOrdinal[opcodeValue];
let x;
switch (op) {
case Mnemonic.FETCH:
x = getWord(pc);
process.stdout.write(`fetch [${x}]`);
pc += WORD_SIZE;
break;
case Mnemonic.STORE:
x = getWord(pc);
process.stdout.write(`store [${x}]`);
pc += WORD_SIZE;
break;
case Mnemonic.PUSH:
x = getWord(pc);
process.stdout.write(`push ${x}`);
pc += WORD_SIZE;
break;
case Mnemonic.ADD: case Mnemonic.SUB: case Mnemonic.MUL: case Mnemonic.DIV: case Mnemonic.MOD:
case Mnemonic.LT: case Mnemonic.GT: case Mnemonic.LE: case Mnemonic.GE: case Mnemonic.EQ: case Mnemonic.NE:
case Mnemonic.AND: case Mnemonic.OR: case Mnemonic.NEG: case Mnemonic.NOT:
case Mnemonic.PRTC: case Mnemonic.PRTI: case Mnemonic.PRTS: case Mnemonic.HALT:
process.stdout.write(op.name.toLowerCase());
break;
case Mnemonic.JMP:
x = getWord(pc); // Read relative offset
// Relative offset is calculated from the *instruction following the word* in the VM
// pc points to the byte *after* the word now.
process.stdout.write(`jmp (${x}) ${pc + x}`);
pc += WORD_SIZE;
break;
case Mnemonic.JZ:
x = getWord(pc); // Read relative offset
// Relative offset is calculated from the *instruction following the word* in the VM
// pc points to the byte *after* the word now.
process.stdout.write(`jz (${x}) ${pc + x}`);
pc += WORD_SIZE;
break;
default:
// Should not happen if MnemonicByOrdinal is correctly built
throw new Error(`Unknown opcode ${op.name} (${opcodeValue}) @ ${pc - 1}`);
}
console.log(); // Newline after each instruction
}
}
// Helper function to get the next line from the pre-loaded array
function getNextLine() {
if (currentLineIndex >= inputLines.length) {
return null; // End of input
}
return inputLines[currentLineIndex++];
}
function loadAst() {
let command, value;
let line = getNextLine();
while (line !== null) {
value = null;
// Check line length before substring to avoid errors
if (line.length > 15) {
command = line.substring(0, 15).trim();
value = line.substring(15).trim();
} else {
command = line.trim();
}
if (command === ";") {
return null; // End of a sequence or node definition
}
const nodeType = strToNodes.get(command);
if (!nodeType) {
throw new Error(`Command not found: '${command}' on line ${currentLineIndex}`);
}
if (value !== null && value !== "") { // Check value is not just empty string
// Note: The Java code checks value != null. If the AST format
// allows "Identifier ", value will be "" after trim.
// Let's stick closer to Java's null check, assuming non-leaf
// nodes won't have anything after column 15 except whitespace.
// Re-evaluating Java: `value != null` is set based on `line.length > 16`.
// Let's emulate that logic closely.
// Java logic: value = line.substring(15).trim(); IF line.length > 16
// Then `if (value != null)` returns leaf.
// If line.length <= 16, command = line.trim(), value = null.
// So leaf nodes MUST have something after col 15.
// Let's adjust: if line.length > 15, value = line.substring(15).trim().
// Then check if value is non-empty. This handles "Identifier " as non-leaf.
// Corrected logic:
if (line.length > 15 && line.substring(15).trim() !== "") {
value = line.substring(15).trim();
return Node.makeLeaf(nodeType, value);
}
// If line.length <= 15 OR substring(15).trim() is empty,
// it's not a leaf with a value in that part of the line.
// Fall through to non-leaf handling.
}
// If it's not a leaf (or the leaf value part was empty/absent based on col 15)
// Recursively load children
const left = loadAst();
const right = loadAst(); // Might be null for unary/sequence end
// Some nodes like Prtc, Prti, Prts, Negate, Not only have a left child
// Based on the Java code_gen, these are treated as makeNode1 implicitly
// but load_ast always tries to read two children.
// The AST format must represent unary nodes with a ";" after the single child.
// Example: Prtc\n Integer 10\n ;\n ;
// This recursive loadAst handles that structure.
return Node.makeNode(nodeType, left, right);
}
// Should ideally not reach here if input is well-formed and ends with AST
// But needed for compiler in Java, let's return null in JS too for safety.
return null;
}
// --- Initialization ---
function initialize() {
strToNodes.set(";", NodeType.nd_None); // Special case for AST parsing termination
strToNodes.set("Sequence", NodeType.nd_Sequence);
strToNodes.set("Identifier", NodeType.nd_Ident);
strToNodes.set("String", NodeType.nd_String);
strToNodes.set("Integer", NodeType.nd_Integer);
strToNodes.set("If", NodeType.nd_If);
strToNodes.set("While", NodeType.nd_While);
strToNodes.set("Prtc", NodeType.nd_Prtc);
strToNodes.set("Prts", NodeType.nd_Prts);
strToNodes.set("Prti", NodeType.nd_Prti);
strToNodes.set("Assign", NodeType.nd_Assign);
strToNodes.set("Negate", NodeType.nd_Negate);
strToNodes.set("Not", NodeType.nd_Not);
strToNodes.set("Multiply", NodeType.nd_Mul);
strToNodes.set("Divide", NodeType.nd_Div);
strToNodes.set("Mod", NodeType.nd_Mod);
strToNodes.set("Add", NodeType.nd_Add);
strToNodes.set("Subtract", NodeType.nd_Sub);
strToNodes.set("Less", NodeType.nd_Lss);
strToNodes.set("LessEqual", NodeType.nd_Leq);
strToNodes.set("Greater", NodeType.nd_Gtr);
strToNodes.set("GreaterEqual", NodeType.nd_Geq);
strToNodes.set("Equal", NodeType.nd_Eql);
strToNodes.set("NotEqual", NodeType.nd_Neq);
strToNodes.set("And", NodeType.nd_And);
strToNodes.set("Or", NodeType.nd_Or);
}
// --- Main Execution ---
function main() {
initialize();
const args = process.argv.slice(2); // Get command line arguments excluding 'node' and script name
if (args.length > 0) {
const filename = args[0];
try {
// Read the entire file synchronously for simpler line-by-line processing
const fileContent = fs.readFileSync(filename, 'utf8');
inputLines = fileContent.split(/\r?\n/); // Split into lines, handling common line endings
currentLineIndex = 0; // Reset line index for loadAst
const ast = loadAst();
if (ast) {
codeGen(ast);
emitByte(Mnemonic.HALT); // Emit HALT after generating code for the main AST
listCode();
} else {
console.log("No valid AST loaded.");
}
} catch (e) {
console.error(`Error: ${e.message}`);
// console.error(e.stack); // Uncomment for detailed stack trace
}
} else {
console.log("Usage: node code_generator.js <ast_file>");
}
}
// Execute the main function
if (require.main === module) {
main();
}