174 lines
4.8 KiB
Rust
174 lines
4.8 KiB
Rust
use super::*;
|
|
use crate::backend::{LinkedProgram, SymTable, Symbol};
|
|
|
|
pub struct Encoder {
|
|
pub data: Vec<u8>,
|
|
pub sym_tab: SymTable<u64>,
|
|
pub missing: Vec<(usize, Symbol)>,
|
|
}
|
|
|
|
pub fn compile(p: &Program<X86_64>) -> Result<LinkedProgram<u64>, CompilerMsg> {
|
|
let mut encoder = Encoder::new(p.sym_count());
|
|
|
|
p.encode_data(&mut encoder.data, &mut encoder.sym_tab);
|
|
|
|
for f in &p.funcs {
|
|
let addr = encoder.data.len();
|
|
encoder.sym_tab.insert(f.sym, addr as u64);
|
|
for instr in &f.instrs {
|
|
compile_instr(&mut encoder, instr)?;
|
|
}
|
|
}
|
|
|
|
for (pos, sym) in encoder.missing.drain(..) {
|
|
let addr = encoder
|
|
.sym_tab
|
|
.get(sym)
|
|
.ok_or(CompilerMsg::from(format!("missing symbol {sym:?}")))?;
|
|
encoder.data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr))
|
|
}
|
|
|
|
Ok(LinkedProgram {
|
|
code: encoder.data,
|
|
entry: p.entry.and_then(|e| encoder.sym_tab.get(e)),
|
|
})
|
|
}
|
|
|
|
type BInstr = crate::backend::Instr<X86_64>;
|
|
fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMsg> {
|
|
match instr {
|
|
BInstr::Asm(asm) => {
|
|
for i in &asm.instrs {
|
|
encoder.asm(*i)?;
|
|
}
|
|
}
|
|
_ => todo!(),
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
impl Encoder {
|
|
// assembly
|
|
|
|
pub fn mov(&mut self, dst: RegMode, src: impl Into<RegImm>) -> Result<(), CompilerMsg> {
|
|
let src = src.into();
|
|
let width = dst.width;
|
|
if width == BitWidth::B16 {
|
|
self.data.push(0x66);
|
|
}
|
|
let dst8 = dst.gt8();
|
|
let b64 = width == BitWidth::B64;
|
|
let b8 = width == BitWidth::B8;
|
|
let src8 = if let RegImm::Reg(src) = src {
|
|
src.gt8()
|
|
} else {
|
|
false
|
|
};
|
|
// special 64-bit / register 4-7 indicator
|
|
if dst8 || src8 || b64 || (dst.gt4() && !dst.high) {
|
|
self.data
|
|
.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
|
|
}
|
|
match src {
|
|
RegImm::Reg(src) => {
|
|
if dst.width != src.width {
|
|
return Err("src and dst are not the same size".into());
|
|
}
|
|
self.data.push(0x88 | !b8 as u8);
|
|
let modrm = 0b11_000_000 | (src.base() << 3) | dst.base();
|
|
self.data.push(modrm);
|
|
}
|
|
RegImm::Imm(imm) => {
|
|
if imm > width.max() {
|
|
return Err("immediate cannot fit in register".into());
|
|
}
|
|
self.data.push(0xb0 | ((!b8 as u8) << 3) | dst.base());
|
|
self.data.extend(&imm.to_le_bytes()[..width.bytes()]);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
pub fn lea(&mut self, dst: RegMode, sym: Symbol) {
|
|
self.data.extend([
|
|
0x48 | ((dst.gt8() as u8) << 2),
|
|
0x8d,
|
|
0x05 | (dst.base() << 3),
|
|
]);
|
|
self.sym_offset4(sym);
|
|
}
|
|
|
|
pub fn int(&mut self, code: u8) {
|
|
self.data.extend([0xcd, code])
|
|
}
|
|
|
|
pub fn syscall(&mut self) {
|
|
self.data.extend([0x0f, 0x05])
|
|
}
|
|
|
|
pub fn call(&mut self, sym: Symbol) {
|
|
self.data.push(0xe8);
|
|
self.sym_offset4(sym);
|
|
}
|
|
|
|
pub fn ret(&mut self) {
|
|
self.data.push(0xc3);
|
|
}
|
|
|
|
pub fn push(&mut self, reg: Reg) {
|
|
if reg.gt8() {
|
|
self.data.push(0x41);
|
|
}
|
|
self.data.push(0x50 | reg.base());
|
|
}
|
|
|
|
pub fn pop(&mut self, reg: Reg) {
|
|
if reg.gt8() {
|
|
self.data.push(0x41);
|
|
}
|
|
self.data.push(0x58 | reg.base());
|
|
}
|
|
|
|
/// inserts a 32 bit offset from a symbol
|
|
pub fn sym_offset4(&mut self, sym: Symbol) {
|
|
let Some(addr) = self.sym_tab.get(sym) else {
|
|
let pos = self.data.len();
|
|
self.data.extend([0; 4]);
|
|
self.missing.push((pos, sym));
|
|
return;
|
|
};
|
|
self.data.extend(addr_offset(self.data.len(), addr));
|
|
}
|
|
|
|
pub fn asm(&mut self, instr: Instr) -> Result<(), CompilerMsg> {
|
|
match instr {
|
|
Instr::Mov { dst, src } => self.mov(dst, src)?,
|
|
Instr::Int(code) => self.int(code),
|
|
Instr::Syscall => self.syscall(),
|
|
Instr::Lea { dst, sym } => self.lea(dst, sym),
|
|
Instr::Call(sym) => self.call(sym),
|
|
Instr::Ret => self.ret(),
|
|
Instr::Push(reg) => self.push(reg),
|
|
Instr::Pop(reg) => self.pop(reg),
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// assumes the next instruction is directly after
|
|
fn addr_offset(pos: usize, addr: u64) -> [u8; 4] {
|
|
let pos = (pos + 4) as i32;
|
|
let offset = addr as i32 - pos;
|
|
offset.to_le_bytes()
|
|
}
|
|
|
|
impl Encoder {
|
|
pub fn new(sym_count: usize) -> Self {
|
|
Self {
|
|
data: Default::default(),
|
|
sym_tab: SymTable::new(sym_count),
|
|
missing: Default::default(),
|
|
}
|
|
}
|
|
}
|