375 lines
11 KiB
C
375 lines
11 KiB
C
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdarg.h>
|
|
#include <stdint.h>
|
|
#include <ctype.h>
|
|
|
|
typedef unsigned char uchar;
|
|
|
|
typedef enum {
|
|
nd_Ident, nd_String, nd_Integer, nd_Sequence, nd_If, nd_Prtc, nd_Prts, nd_Prti, nd_While,
|
|
nd_Assign, nd_Negate, nd_Not, nd_Mul, nd_Div, nd_Mod, nd_Add, nd_Sub, nd_Lss, nd_Leq,
|
|
nd_Gtr, nd_Geq, nd_Eql, nd_Neq, nd_And, nd_Or
|
|
} NodeType;
|
|
|
|
typedef enum { FETCH, STORE, PUSH, ADD, SUB, MUL, DIV, MOD, LT, GT, LE, GE, EQ, NE, AND,
|
|
OR, NEG, NOT, JMP, JZ, PRTC, PRTS, PRTI, HALT
|
|
} Code_t;
|
|
|
|
typedef uchar code;
|
|
|
|
typedef struct Tree {
|
|
NodeType node_type;
|
|
struct Tree *left;
|
|
struct Tree *right;
|
|
char *value;
|
|
} Tree;
|
|
|
|
#define da_dim(name, type) type *name = NULL; \
|
|
int _qy_ ## name ## _p = 0; \
|
|
int _qy_ ## name ## _max = 0
|
|
|
|
#define da_redim(name) do {if (_qy_ ## name ## _p >= _qy_ ## name ## _max) \
|
|
name = realloc(name, (_qy_ ## name ## _max += 32) * sizeof(name[0]));} while (0)
|
|
|
|
#define da_rewind(name) _qy_ ## name ## _p = 0
|
|
|
|
#define da_append(name, x) do {da_redim(name); name[_qy_ ## name ## _p++] = x;} while (0)
|
|
#define da_len(name) _qy_ ## name ## _p
|
|
#define da_add(name) do {da_redim(name); _qy_ ## name ## _p++;} while (0)
|
|
|
|
FILE *source_fp, *dest_fp;
|
|
static int here;
|
|
da_dim(object, code);
|
|
da_dim(globals, const char *);
|
|
da_dim(string_pool, const char *);
|
|
|
|
// dependency: Ordered by NodeType, must remain in same order as NodeType enum
|
|
struct {
|
|
char *enum_text;
|
|
NodeType node_type;
|
|
Code_t opcode;
|
|
} atr[] = {
|
|
{"Identifier" , nd_Ident, -1 },
|
|
{"String" , nd_String, -1 },
|
|
{"Integer" , nd_Integer, -1 },
|
|
{"Sequence" , nd_Sequence, -1 },
|
|
{"If" , nd_If, -1 },
|
|
{"Prtc" , nd_Prtc, -1 },
|
|
{"Prts" , nd_Prts, -1 },
|
|
{"Prti" , nd_Prti, -1 },
|
|
{"While" , nd_While, -1 },
|
|
{"Assign" , nd_Assign, -1 },
|
|
{"Negate" , nd_Negate, NEG},
|
|
{"Not" , nd_Not, NOT},
|
|
{"Multiply" , nd_Mul, MUL},
|
|
{"Divide" , nd_Div, DIV},
|
|
{"Mod" , nd_Mod, MOD},
|
|
{"Add" , nd_Add, ADD},
|
|
{"Subtract" , nd_Sub, SUB},
|
|
{"Less" , nd_Lss, LT },
|
|
{"LessEqual" , nd_Leq, LE },
|
|
{"Greater" , nd_Gtr, GT },
|
|
{"GreaterEqual", nd_Geq, GE },
|
|
{"Equal" , nd_Eql, EQ },
|
|
{"NotEqual" , nd_Neq, NE },
|
|
{"And" , nd_And, AND},
|
|
{"Or" , nd_Or, OR },
|
|
};
|
|
|
|
void error(const char *fmt, ... ) {
|
|
va_list ap;
|
|
char buf[1000];
|
|
|
|
va_start(ap, fmt);
|
|
vsprintf(buf, fmt, ap);
|
|
va_end(ap);
|
|
printf("error: %s\n", buf);
|
|
exit(1);
|
|
}
|
|
|
|
Code_t type_to_op(NodeType type) {
|
|
return atr[type].opcode;
|
|
}
|
|
|
|
Tree *make_node(NodeType node_type, Tree *left, Tree *right) {
|
|
Tree *t = calloc(sizeof(Tree), 1);
|
|
t->node_type = node_type;
|
|
t->left = left;
|
|
t->right = right;
|
|
return t;
|
|
}
|
|
|
|
Tree *make_leaf(NodeType node_type, char *value) {
|
|
Tree *t = calloc(sizeof(Tree), 1);
|
|
t->node_type = node_type;
|
|
t->value = strdup(value);
|
|
return t;
|
|
}
|
|
|
|
/*** Code generator ***/
|
|
|
|
void emit_byte(int c) {
|
|
da_append(object, (uchar)c);
|
|
++here;
|
|
}
|
|
|
|
void emit_int(int32_t n) {
|
|
union {
|
|
int32_t n;
|
|
unsigned char c[sizeof(int32_t)];
|
|
} x;
|
|
|
|
x.n = n;
|
|
|
|
for (size_t i = 0; i < sizeof(x.n); ++i) {
|
|
emit_byte(x.c[i]);
|
|
}
|
|
}
|
|
|
|
int hole() {
|
|
int t = here;
|
|
emit_int(0);
|
|
return t;
|
|
}
|
|
|
|
void fix(int src, int dst) {
|
|
*(int32_t *)(object + src) = dst-src;
|
|
}
|
|
|
|
int fetch_var_offset(const char *id) {
|
|
for (int i = 0; i < da_len(globals); ++i) {
|
|
if (strcmp(id, globals[i]) == 0)
|
|
return i;
|
|
}
|
|
da_add(globals);
|
|
int n = da_len(globals) - 1;
|
|
globals[n] = strdup(id);
|
|
return n;
|
|
}
|
|
|
|
int fetch_string_offset(const char *st) {
|
|
for (int i = 0; i < da_len(string_pool); ++i) {
|
|
if (strcmp(st, string_pool[i]) == 0)
|
|
return i;
|
|
}
|
|
da_add(string_pool);
|
|
int n = da_len(string_pool) - 1;
|
|
string_pool[n] = strdup(st);
|
|
return n;
|
|
}
|
|
|
|
void code_gen(Tree *x) {
|
|
int p1, p2, n;
|
|
|
|
if (x == NULL) return;
|
|
switch (x->node_type) {
|
|
case nd_Ident:
|
|
emit_byte(FETCH);
|
|
n = fetch_var_offset(x->value);
|
|
emit_int(n);
|
|
break;
|
|
case nd_Integer:
|
|
emit_byte(PUSH);
|
|
emit_int(atoi(x->value));
|
|
break;
|
|
case nd_String:
|
|
emit_byte(PUSH);
|
|
n = fetch_string_offset(x->value);
|
|
emit_int(n);
|
|
break;
|
|
case nd_Assign:
|
|
n = fetch_var_offset(x->left->value);
|
|
code_gen(x->right);
|
|
emit_byte(STORE);
|
|
emit_int(n);
|
|
break;
|
|
case nd_If:
|
|
code_gen(x->left); // if expr
|
|
emit_byte(JZ); // if false, jump
|
|
p1 = hole(); // make room for jump dest
|
|
code_gen(x->right->left); // if true statements
|
|
if (x->right->right != NULL) {
|
|
emit_byte(JMP);
|
|
p2 = hole();
|
|
}
|
|
fix(p1, here);
|
|
if (x->right->right != NULL) {
|
|
code_gen(x->right->right);
|
|
fix(p2, here);
|
|
}
|
|
break;
|
|
case nd_While:
|
|
p1 = here;
|
|
code_gen(x->left); // while expr
|
|
emit_byte(JZ); // if false, jump
|
|
p2 = hole(); // make room for jump dest
|
|
code_gen(x->right); // statements
|
|
emit_byte(JMP); // back to the top
|
|
fix(hole(), p1); // plug the top
|
|
fix(p2, here); // plug the 'if false, jump'
|
|
break;
|
|
case nd_Sequence:
|
|
code_gen(x->left);
|
|
code_gen(x->right);
|
|
break;
|
|
case nd_Prtc:
|
|
code_gen(x->left);
|
|
emit_byte(PRTC);
|
|
break;
|
|
case nd_Prti:
|
|
code_gen(x->left);
|
|
emit_byte(PRTI);
|
|
break;
|
|
case nd_Prts:
|
|
code_gen(x->left);
|
|
emit_byte(PRTS);
|
|
break;
|
|
case nd_Lss: case nd_Gtr: case nd_Leq: case nd_Geq: case nd_Eql: case nd_Neq:
|
|
case nd_And: case nd_Or: case nd_Sub: case nd_Add: case nd_Div: case nd_Mul:
|
|
case nd_Mod:
|
|
code_gen(x->left);
|
|
code_gen(x->right);
|
|
emit_byte(type_to_op(x->node_type));
|
|
break;
|
|
case nd_Negate: case nd_Not:
|
|
code_gen(x->left);
|
|
emit_byte(type_to_op(x->node_type));
|
|
break;
|
|
default:
|
|
error("error in code generator - found %d, expecting operator\n", x->node_type);
|
|
}
|
|
}
|
|
|
|
void code_finish() {
|
|
emit_byte(HALT);
|
|
}
|
|
|
|
void list_code() {
|
|
fprintf(dest_fp, "Datasize: %d Strings: %d\n", da_len(globals), da_len(string_pool));
|
|
for (int i = 0; i < da_len(string_pool); ++i)
|
|
fprintf(dest_fp, "%s\n", string_pool[i]);
|
|
|
|
code *pc = object;
|
|
|
|
again: fprintf(dest_fp, "%5d ", (int)(pc - object));
|
|
switch (*pc++) {
|
|
case FETCH: fprintf(dest_fp, "fetch [%d]\n", *(int32_t *)pc);
|
|
pc += sizeof(int32_t); goto again;
|
|
case STORE: fprintf(dest_fp, "store [%d]\n", *(int32_t *)pc);
|
|
pc += sizeof(int32_t); goto again;
|
|
case PUSH : fprintf(dest_fp, "push %d\n", *(int32_t *)pc);
|
|
pc += sizeof(int32_t); goto again;
|
|
case ADD : fprintf(dest_fp, "add\n"); goto again;
|
|
case SUB : fprintf(dest_fp, "sub\n"); goto again;
|
|
case MUL : fprintf(dest_fp, "mul\n"); goto again;
|
|
case DIV : fprintf(dest_fp, "div\n"); goto again;
|
|
case MOD : fprintf(dest_fp, "mod\n"); goto again;
|
|
case LT : fprintf(dest_fp, "lt\n"); goto again;
|
|
case GT : fprintf(dest_fp, "gt\n"); goto again;
|
|
case LE : fprintf(dest_fp, "le\n"); goto again;
|
|
case GE : fprintf(dest_fp, "ge\n"); goto again;
|
|
case EQ : fprintf(dest_fp, "eq\n"); goto again;
|
|
case NE : fprintf(dest_fp, "ne\n"); goto again;
|
|
case AND : fprintf(dest_fp, "and\n"); goto again;
|
|
case OR : fprintf(dest_fp, "or\n"); goto again;
|
|
case NOT : fprintf(dest_fp, "not\n"); goto again;
|
|
case NEG : fprintf(dest_fp, "neg\n"); goto again;
|
|
case JMP : fprintf(dest_fp, "jmp (%d) %d\n",
|
|
*(int32_t *)pc, (int32_t)(pc + *(int32_t *)pc - object));
|
|
pc += sizeof(int32_t); goto again;
|
|
case JZ : fprintf(dest_fp, "jz (%d) %d\n",
|
|
*(int32_t *)pc, (int32_t)(pc + *(int32_t *)pc - object));
|
|
pc += sizeof(int32_t); goto again;
|
|
case PRTC : fprintf(dest_fp, "prtc\n"); goto again;
|
|
case PRTI : fprintf(dest_fp, "prti\n"); goto again;
|
|
case PRTS : fprintf(dest_fp, "prts\n"); goto again;
|
|
case HALT : fprintf(dest_fp, "halt\n"); break;
|
|
default:error("listcode:Unknown opcode %d\n", *(pc - 1));
|
|
}
|
|
}
|
|
|
|
void init_io(FILE **fp, FILE *std, const char mode[], const char fn[]) {
|
|
if (fn[0] == '\0')
|
|
*fp = std;
|
|
else if ((*fp = fopen(fn, mode)) == NULL)
|
|
error(0, 0, "Can't open %s\n", fn);
|
|
}
|
|
|
|
NodeType get_enum_value(const char name[]) {
|
|
for (size_t i = 0; i < sizeof(atr) / sizeof(atr[0]); i++) {
|
|
if (strcmp(atr[i].enum_text, name) == 0) {
|
|
return atr[i].node_type;
|
|
}
|
|
}
|
|
error("Unknown token %s\n", name);
|
|
return -1;
|
|
}
|
|
|
|
char *read_line(int *len) {
|
|
static char *text = NULL;
|
|
static int textmax = 0;
|
|
|
|
for (*len = 0; ; (*len)++) {
|
|
int ch = fgetc(source_fp);
|
|
if (ch == EOF || ch == '\n') {
|
|
if (*len == 0)
|
|
return NULL;
|
|
break;
|
|
}
|
|
if (*len + 1 >= textmax) {
|
|
textmax = (textmax == 0 ? 128 : textmax * 2);
|
|
text = realloc(text, textmax);
|
|
}
|
|
text[*len] = ch;
|
|
}
|
|
text[*len] = '\0';
|
|
return text;
|
|
}
|
|
|
|
char *rtrim(char *text, int *len) { // remove trailing spaces
|
|
for (; *len > 0 && isspace(text[*len - 1]); --(*len))
|
|
;
|
|
|
|
text[*len] = '\0';
|
|
return text;
|
|
}
|
|
|
|
Tree *load_ast() {
|
|
int len;
|
|
char *yytext = read_line(&len);
|
|
yytext = rtrim(yytext, &len);
|
|
|
|
// get first token
|
|
char *tok = strtok(yytext, " ");
|
|
|
|
if (tok[0] == ';') {
|
|
return NULL;
|
|
}
|
|
NodeType node_type = get_enum_value(tok);
|
|
|
|
// if there is extra data, get it
|
|
char *p = tok + strlen(tok);
|
|
if (p != &yytext[len]) {
|
|
for (++p; isspace(*p); ++p)
|
|
;
|
|
return make_leaf(node_type, p);
|
|
}
|
|
|
|
Tree *left = load_ast();
|
|
Tree *right = load_ast();
|
|
return make_node(node_type, left, right);
|
|
}
|
|
|
|
int main(int argc, char *argv[]) {
|
|
init_io(&source_fp, stdin, "r", argc > 1 ? argv[1] : "");
|
|
init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : "");
|
|
|
|
code_gen(load_ast());
|
|
code_finish();
|
|
list_code();
|
|
|
|
return 0;
|
|
}
|