use super::*; use crate::backend::{LinkedProgram, SymTable, Symbol}; pub struct Encoder { pub data: Vec, pub sym_tab: SymTable, pub missing: Vec<(usize, Symbol)>, } pub fn compile(p: &Program) -> Result, CompilerMsg> { let mut encoder = Encoder::new(p.sym_count()); p.encode_data(&mut encoder.data, &mut encoder.sym_tab); for f in &p.funcs { let addr = encoder.data.len(); encoder.sym_tab.insert(f.sym, addr as u64); for instr in &f.instrs { compile_instr(&mut encoder, instr)?; } } for (pos, sym) in encoder.missing.drain(..) { let addr = encoder .sym_tab .get(sym) .ok_or(CompilerMsg::from(format!("missing symbol {sym:?}")))?; encoder.data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)) } Ok(LinkedProgram { code: encoder.data, entry: p.entry.and_then(|e| encoder.sym_tab.get(e)), }) } type BInstr = crate::backend::Instr; fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMsg> { match instr { BInstr::Asm(asm) => { for i in &asm.instrs { encoder.asm(*i)?; } } _ => todo!(), } Ok(()) } impl Encoder { // assembly pub fn mov(&mut self, dst: RegMode, src: impl Into) -> Result<(), CompilerMsg> { let src = src.into(); let width = dst.width; if width == BitWidth::B16 { self.data.push(0x66); } let dst8 = dst.gt8(); let b64 = width == BitWidth::B64; let b8 = width == BitWidth::B8; let src8 = if let RegImm::Reg(src) = src { src.gt8() } else { false }; // special 64-bit / register 4-7 indicator if dst8 || src8 || b64 || (dst.gt4() && !dst.high) { self.data .push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2)); } match src { RegImm::Reg(src) => { if dst.width != src.width { return Err("src and dst are not the same size".into()); } self.data.push(0x88 | !b8 as u8); let modrm = 0b11_000_000 | (src.base() << 3) | dst.base(); self.data.push(modrm); } RegImm::Imm(imm) => { if imm > width.max() { return Err("immediate cannot fit in register".into()); } self.data.push(0xb0 | ((!b8 as u8) << 3) | dst.base()); self.data.extend(&imm.to_le_bytes()[..width.bytes()]); } } Ok(()) } pub fn lea(&mut self, dst: RegMode, sym: Symbol) { self.data.extend([ 0x48 | ((dst.gt8() as u8) << 2), 0x8d, 0x05 | (dst.base() << 3), ]); self.sym_offset4(sym); } pub fn int(&mut self, code: u8) { self.data.extend([0xcd, code]) } pub fn syscall(&mut self) { self.data.extend([0x0f, 0x05]) } pub fn call(&mut self, sym: Symbol) { self.data.push(0xe8); self.sym_offset4(sym); } pub fn ret(&mut self) { self.data.push(0xc3); } pub fn push(&mut self, reg: Reg) { if reg.gt8() { self.data.push(0x41); } self.data.push(0x50 | reg.base()); } pub fn pop(&mut self, reg: Reg) { if reg.gt8() { self.data.push(0x41); } self.data.push(0x58 | reg.base()); } /// inserts a 32 bit offset from a symbol pub fn sym_offset4(&mut self, sym: Symbol) { let Some(addr) = self.sym_tab.get(sym) else { let pos = self.data.len(); self.data.extend([0; 4]); self.missing.push((pos, sym)); return; }; self.data.extend(addr_offset(self.data.len(), addr)); } pub fn asm(&mut self, instr: Instr) -> Result<(), CompilerMsg> { match instr { Instr::Mov { dst, src } => self.mov(dst, src)?, Instr::Int(code) => self.int(code), Instr::Syscall => self.syscall(), Instr::Lea { dst, sym } => self.lea(dst, sym), Instr::Call(sym) => self.call(sym), Instr::Ret => self.ret(), Instr::Push(reg) => self.push(reg), Instr::Pop(reg) => self.pop(reg), } Ok(()) } } /// assumes the next instruction is directly after fn addr_offset(pos: usize, addr: u64) -> [u8; 4] { let pos = (pos + 4) as i32; let offset = addr as i32 - pos; offset.to_le_bytes() } impl Encoder { pub fn new(sym_count: usize) -> Self { Self { data: Default::default(), sym_tab: SymTable::new(sym_count), missing: Default::default(), } } }