arch refactor + backend ir start

This commit is contained in:
2026-06-06 21:00:39 -04:00
parent 0ac7c5cc02
commit 4587f687b9
22 changed files with 547 additions and 661 deletions
+141
View File
@@ -0,0 +1,141 @@
use super::*;
use crate::backend::{Addr, LinkedProgram, SymTable, Symbol};
pub struct Encoder {
pub data: Vec<u8>,
pub sym_tab: SymTable,
pub missing: Vec<(usize, Symbol)>,
}
pub fn encode_program(p: &Program<X86_64>) -> Result<LinkedProgram, CompilerMsg> {
let mut encoder = Encoder::new(p.sym_count());
p.encode_data(&mut encoder.data, &mut encoder.sym_tab);
for f in &p.funcs {
let addr = encoder.data.len();
encoder.sym_tab.insert(f.sym, addr as u64);
for instr in &f.instrs {
compile_instr(&mut encoder, instr)?;
}
}
for (pos, sym) in encoder.missing.drain(..) {
let addr = encoder
.sym_tab
.get(sym)
.ok_or(CompilerMsg::from(format!("unknown symbol {sym:?}")))?;
encoder.data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr))
}
Ok(LinkedProgram {
code: encoder.data,
entry: p.entry.and_then(|e| encoder.sym_tab.get(e)),
})
}
type BInstr = crate::backend::Instr<X86_64>;
fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMsg> {
match instr {
BInstr::Copy { dst, src } => todo!(),
BInstr::Asm(asm) => {
for i in &asm.instrs {
encoder.asm(*i)?;
}
}
}
Ok(())
}
impl Encoder {
// assembly
pub fn mov(&mut self, dst: RegMode, src: impl Into<RegImm>) -> Result<(), CompilerMsg> {
let src = src.into();
let width = dst.width;
if width == BitWidth::B16 {
self.data.push(0x66);
}
let dst8 = dst.gt8();
let b64 = width == BitWidth::B64;
let b8 = width == BitWidth::B8;
let src8 = if let RegImm::Reg(src) = src {
src.gt8()
} else {
false
};
// special 64-bit / register 4-7 indicator
if dst8 || src8 || b64 || (dst.gt4() && !dst.high) {
self.data
.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
}
match src {
RegImm::Reg(src) => {
if dst.width != src.width {
return Err("src and dst are not the same size".into());
}
self.data.push(0x88 | !b8 as u8);
let modrm = 0b11_000_000 | (src.base() << 3) | dst.base();
self.data.push(modrm);
}
RegImm::Imm(imm) => {
if imm > width.max() {
return Err("immediate cannot fit in register".into());
}
self.data.push(0xb0 | ((!b8 as u8) << 3) | dst.base());
self.data.extend(&imm.to_le_bytes()[..width.bytes()]);
}
}
Ok(())
}
pub fn lea(&mut self, dst: RegMode, sym: Symbol) {
self.data.extend([
0x48 | ((dst.gt8() as u8) << 2),
0x8d,
0x05 | (dst.base() << 3),
]);
let Some(addr) = self.sym_tab.get(sym) else {
let pos = self.data.len();
self.data.extend([0; 4]);
self.missing.push((pos, sym));
return;
};
self.data.extend(addr_offset(self.data.len(), addr));
}
pub fn int(&mut self, code: u8) {
self.data.extend([0xcd, code])
}
pub fn syscall(&mut self) {
self.data.extend([0x0f, 0x05])
}
pub fn asm(&mut self, instr: Instr) -> Result<(), CompilerMsg> {
match instr {
Instr::Mov { dst, src } => self.mov(dst, src)?,
Instr::Int { code } => self.int(code),
Instr::Syscall => self.syscall(),
Instr::Lea { dst, sym } => self.lea(dst, sym),
}
Ok(())
}
}
/// assumes the next instruction is directly after
fn addr_offset(pos: usize, addr: Addr) -> [u8; 4] {
let pos = (pos + 4) as i32;
let offset = addr as i32 - pos;
offset.to_le_bytes()
}
impl Encoder {
pub fn new(sym_count: usize) -> Self {
Self {
data: Default::default(),
sym_tab: SymTable::new(sym_count),
missing: Default::default(),
}
}
}