(notonline)--> -- -- demo\rosetta\Compiler\cgen.e -- ============================ -- -- The reusable part of cgen.exw -- without js -- (machine code!) include parse.e global sequence vars = {}, strings = {}, stringptrs = {} global integer chain = 0 global sequence code = {} function var_idx(sequence inode) if inode[1]!=tk_Identifier then ?9/0 end if string ident = inode[2] integer n = find(ident,vars) if n=0 then vars = append(vars,ident) n = length(vars) end if return n end function function string_idx(sequence inode) if inode[1]!=tk_String then ?9/0 end if string s = inode[2] integer n = find(s,strings) if n=0 then strings = append(strings,s) stringptrs = append(stringptrs,0) n = length(strings) end if return n end function function gen_size(object t) -- note: must be kept precisely in sync with gen_rec! -- (relentlessly tested via estsize/actsize) integer size = 0 if t!=NULL then integer n_type = t[1] string node_type = tkNames[n_type] switch n_type do case tk_Sequence: size += gen_size(t[2]) size += gen_size(t[3]) case tk_assign: size += gen_size(t[3])+6 case tk_Integer: size += 5 case tk_Identifier: size += 6 case tk_String: size += 5 case tk_while: -- emit: @@:<condition><topjmp(@f)><body><tailjmp(@b)>@@: size += gen_size(t[2])+3 integer body = gen_size(t[3]) integer stail = iff(size+body+2>128?5:2) integer stop = iff(body+stail >127?6:2) size += stop+body+stail case tk_lt: case tk_le: case tk_ne: case tk_eq: case tk_gt: case tk_ge: size += gen_size(t[2]) size += gen_size(t[3]) size += 10 case tk_and: case tk_or: size += gen_size(t[2]) size += gen_size(t[3]) size += 15 case tk_add: case tk_sub: size += gen_size(t[2]) size += gen_size(t[3]) size += 4 case tk_mul: size += gen_size(t[2]) size += gen_size(t[3]) size += 5 case tk_div: case tk_mod: size += gen_size(t[2]) size += gen_size(t[3]) size += 6 case tk_putc: case tk_Printi: case tk_Prints: size += gen_size(t[2]) size += 5 case tk_if: size += gen_size(t[2])+3 if t[3][1]!=tk_if then ?9/0 end if integer truesize = gen_size(t[3][2]) integer falsesize = gen_size(t[3][3]) integer elsejmp = iff(falsesize=0?0:iff(falsesize>127?5:2)) integer mainjmp = iff(truesize+elsejmp>127?6:2) size += mainjmp+truesize+elsejmp+falsesize case tk_not: size += gen_size(t[2]) size += 9 case tk_neg: size += gen_size(t[2]) size += 4 else: ?9/0 end switch end if return size end function procedure gen_rec(object t) -- the recursive part of code_gen if t!=NULL then integer initsize = length(code) integer estsize = gen_size(t) -- (test the gen_size function) integer n_type = t[1] string node_type = tkNames[n_type] switch n_type do case tk_Sequence: gen_rec(t[2]) gen_rec(t[3]) case tk_assign: integer n = var_idx(t[2]) gen_rec(t[3]) code &= {0o217,0o005,chain,1,n,0} -- pop [i] chain = length(code)-3 case tk_Integer: integer n = t[2] code &= 0o150&int_to_bytes(n) -- push imm32 case tk_while: -- emit: @@:<condition><topjmp(@f)><body><tailjmp(@b)>@@: integer looptop = length(code) gen_rec(t[2]) code &= {0o130, -- pop eax 0o205,0o300} -- test eax,eax integer bodysize = gen_size(t[3]) -- can we use short jumps? -- disclaimer: size calcs are not heavily tested; if in -- doubt reduce 128/7 by 8, and if that works -- then yep, you just found a boundary case. integer stail = iff(length(code)+bodysize+4-looptop>128?5:2) integer offset = bodysize+stail integer stop = iff(offset>127?6:2) if stop=2 then code &= {0o164,offset} -- jz (short) end else code &= {0o017,0o204}&int_to_bytes(offset) -- jz (long) end end if gen_rec(t[3]) offset = looptop-(length(code)+stail) if stail=2 then code &= 0o353&offset -- jmp looptop (short) else code &= 0o351&int_to_bytes(offset) -- jmp looptop (long) end if case tk_lt: case tk_le: case tk_gt: case tk_ge: case tk_ne: case tk_eq: gen_rec(t[2]) gen_rec(t[3]) integer xrm if n_type=tk_ne then xrm = 0o225 -- (#95) elsif n_type=tk_lt then xrm = 0o234 -- (#9C) elsif n_type=tk_ge then xrm = 0o235 -- (#9D) elsif n_type=tk_le then xrm = 0o236 -- (#9E) elsif n_type=tk_gt then xrm = 0o237 -- (#9F) else ?9/0 end if code &= { 0o061,0o300, -- xor eax,eax 0o132, -- pop edx 0o131, -- pop ecx 0o071,0o321, -- cmp ecx,edx 0o017,xrm,0o300, -- setcc al 0o120} -- push eax case tk_or: case tk_and: gen_rec(t[2]) gen_rec(t[3]) integer op = find(n_type,{tk_or,0,0,tk_and}) op *= 0o010 code &= { 0o130, -- pop eax 0o131, -- pop ecx 0o205,0o300, -- test eax,eax 0o017,0o225,0o300, -- setne al 0o205,0o311, -- test ecx,ecx 0o017,0o225,0o301, -- setne cl op,0o310, -- or/and al,cl 0o120} -- push eax case tk_add: case tk_sub: gen_rec(t[2]) gen_rec(t[3]) integer op = find(n_type,{tk_add,0,0,0,0,tk_sub}) op = 0o001 + (op-1)*0o010 code &= { 0o130, -- pop eax op,0o004,0o044} -- add/or/and/sub [esp],eax case tk_mul: gen_rec(t[2]) gen_rec(t[3]) code &= { 0o131, -- pop ecx 0o130, -- pop eax 0o367,0o341, -- mul ecx 0o120} -- push eax case tk_div: case tk_mod: gen_rec(t[2]) gen_rec(t[3]) integer push = 0o120+(n_type=tk_mod)*2 code &= { 0o131, -- pop ecx 0o130, -- pop eax 0o231, -- cdq (eax -> edx:eax) 0o367,0o371, -- idiv ecx push} -- push eax|edx case tk_Identifier: integer n = var_idx(t) code &= {0o377,0o065,chain,1,n,0} -- push [n] chain = length(code)-3 case tk_putc: case tk_Printi: case tk_Prints: gen_rec(t[2]) integer n = find(n_type,{tk_putc,tk_Printi,tk_Prints}) code &= {0o350,chain,3,n,0} -- call :printc/i/s chain = length(code)-3 case tk_String: integer n = string_idx(t) code &= {0o150,chain,2,n,0} -- push RawStringPtr(string) chain = length(code)-3 case tk_if: -- emit: <condition><mainjmp><truepart>[<elsejmp><falsepart>] gen_rec(t[2]) code &= {0o130, -- pop eax 0o205,0o300} -- test eax,eax if t[3][1]!=tk_if then ?9/0 end if integer truesize = gen_size(t[3][2]) integer falsesize = gen_size(t[3][3]) integer elsejmp = iff(falsesize=0?0:iff(falsesize>127?5:2)) integer offset = truesize+elsejmp integer mainjmp = iff(offset>127?6:2) if mainjmp=2 then code &= {0o164,offset} -- jz (short) else/end else code &= {0o017,0o204}&int_to_bytes(offset) -- jz (long) else/end end if gen_rec(t[3][2]) if falsesize!=0 then offset = falsesize if elsejmp=2 then code &= 0o353&offset -- jmp end if (short) else code &= 0o351&int_to_bytes(offset) -- jmp end if (long) end if gen_rec(t[3][3]) end if case tk_not: gen_rec(t[2]) code &= {0o132, -- pop edx 0o061,0o300, -- xor eax,eax 0o205,0o322, -- test edx,edx 0o017,0o224,0o300, -- setz al 0o120} -- push eax case tk_neg: gen_rec(t[2]) code &= {0o130, -- pop eax 0o367,0o330, -- neg eax 0o120} -- push eax else: error("error in code generator - found %d, expecting operator\n", {n_type}) end switch integer actsize = length(code) if initsize+estsize!=actsize then ?"9/0" end if -- (test gen_size) end if end procedure global procedure code_gen(object t) -- -- Generates proper machine code. -- -- Example: i=10; print "\n"; print i; print "\n" -- Result in vars, strings, chain, code (declared above) -- where vars is: {"i"}, -- strings is {"\n"}, -- code is { 0o150,#0A,#00,#00,#00, -- 1: push 10 -- 0o217,0o005,0,1,1,0 -- 6: pop [i] -- 0o150,8,2,1,0, -- 12: push ("\n") -- 0o350,13,3,3,0, -- 17: call :prints -- 0o377,0o065,18,1,1,0, -- 22: push [i] -- 0o350,24,3,2,0, -- 28: call :printi -- 0o150,29,2,1,0, -- 33: push ("\n") -- 0o350,34,3,3,0, -- 38: call :prints -- 0o303} -- 43: ret -- and chain is 39 (->34->29->24->18->13->8->0) -- The chain connects all places where we need an actual address before -- the code is executed, with the byte after the link differentiating -- between var(1), string(2), and builtin(3), and the byte after that -- determining the instance of the given type - not that any of them -- are actually limited to a byte in the above intermediate form, and -- of course the trailing 0 of each {link,type,id,0} is just there to -- reserve the space we will need. -- gen_rec(t) code = append(code,0o303) -- ret (0o303=#C3) end procedure include builtins/VM/puts1.e -- low-level console i/o routines function setbuiltins() atom printc,printi,prints #ilASM{ jmp :setbuiltins ::printc lea edi,[esp+4] mov esi,1 call :%puts1ediesi -- (edi=raw text, esi=length) ret 4 ::printi mov eax,[esp+4] push 0 -- no cr call :%putsint -- (nb limited to +/-9,999,999,999) ret 4 ::prints mov edi,[esp+4] mov esi,[edi-12] call :%puts1ediesi -- (edi=raw text, esi=length) ret 4 ::setbuiltins mov eax,:printc lea edi,[printc] call :%pStoreMint mov eax,:printi lea edi,[printi] call :%pStoreMint mov eax,:prints lea edi,[prints] call :%pStoreMint } return {printc,printi,prints} end function global constant builtin_names = {"printc","printi","prints"} global constant builtins = setbuiltins() global atom var_mem, code_mem function RawStringPtr(integer n) -- (based on IupRawStringPtr from pGUI.e) -- -- Returns a raw string pointer for s, somewhat like allocate_string(s), but using the existing memory. -- NOTE: The return is only valid as long as the value passed as the parameter remains in existence. -- atom res string s = strings[n] #ilASM{ mov eax,[s] lea edi,[res] shl eax,2 call :%pStoreMint } stringptrs[n] = res return res end function global procedure fixup() var_mem = allocate(length(vars)*4) mem_set(var_mem,0,length(vars)*4) code_mem = allocate(length(code)) poke(code_mem,code) while chain!=0 do integer this = chain chain = code[this] integer ftype = code[this+1] integer id = code[this+2] switch ftype do case 1: -- vars poke4(code_mem+this-1,var_mem+(id-1)*4) case 2: -- strings poke4(code_mem+this-1,RawStringPtr(id)) case 3: -- builtins poke4(code_mem+this-1,builtins[id]-(code_mem+this+3)) end switch end while end procedure