223 lines
6.3 KiB
Rust
223 lines
6.3 KiB
Rust
use std::collections::HashMap;
|
|
|
|
use super::*;
|
|
use crate::backend::{LibImport, LinkedProgram, SymImport, SymTable, Symbol};
|
|
use util::*;
|
|
|
|
pub struct Encoder<'a> {
|
|
pub data: Vec<u8>,
|
|
pub sym_tab: SymTable<u64>,
|
|
pub missing: Vec<(usize, Symbol)>,
|
|
pub sym_refs: HashMap<Symbol, Vec<usize>>,
|
|
pub program: &'a Program<X86_64>,
|
|
}
|
|
|
|
pub fn compile(p: &Program<X86_64>) -> Result<LinkedProgram<u64>, CompilerMsg> {
|
|
let mut encoder = Encoder::new(p);
|
|
|
|
p.encode_data(&mut encoder.data, &mut encoder.sym_tab);
|
|
|
|
for f in &p.funcs {
|
|
let addr = encoder.data.len();
|
|
encoder.sym_tab.insert(f.sym, addr as u64);
|
|
for instr in &f.instrs {
|
|
compile_instr(&mut encoder, instr)?;
|
|
}
|
|
}
|
|
|
|
for (pos, sym) in encoder.missing.drain(..) {
|
|
let addr = encoder
|
|
.sym_tab
|
|
.get(sym)
|
|
.ok_or(CompilerMsg::from(format!("missing symbol {sym:?}")))?;
|
|
encoder.data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr))
|
|
}
|
|
|
|
let imports = p
|
|
.external
|
|
.iter()
|
|
.map(|e| LibImport {
|
|
name: e.file.clone(),
|
|
syms: e
|
|
.syms
|
|
.iter()
|
|
.map(|&s| SymImport {
|
|
name: p.sym_info(s).name.clone(),
|
|
usages: encoder.sym_refs.entry(s).or_default().clone(),
|
|
})
|
|
.collect(),
|
|
})
|
|
.collect();
|
|
|
|
Ok(LinkedProgram {
|
|
code: encoder.data,
|
|
entry: p.entry.and_then(|e| encoder.sym_tab.get(e)),
|
|
imports,
|
|
})
|
|
}
|
|
|
|
type BInstr = crate::backend::Instr<X86_64>;
|
|
fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMsg> {
|
|
match instr {
|
|
BInstr::Asm(asm) => {
|
|
for i in &asm.instrs {
|
|
encoder.asm(*i)?;
|
|
}
|
|
}
|
|
_ => todo!(),
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
impl Encoder<'_> {
|
|
// assembly
|
|
|
|
pub fn movi(&mut self, dst: RegWH, imm: u64) -> Result<(), CompilerMsg> {
|
|
if dst.widthh == WidthH::B16 {
|
|
self.data.push(0x66);
|
|
}
|
|
if dst.requires_rex() {
|
|
self.data.push(rex(dst.widthh, 0, 0, dst));
|
|
}
|
|
if imm > dst.widthh.max() {
|
|
return Err("immediate cannot fit in register".into());
|
|
}
|
|
let opcode = 0xb0 | ((dst.widthh.gt8() as u8) << 3);
|
|
self.data.push(opcode | dst.base());
|
|
self.data.extend(&imm.to_le_bytes()[..dst.widthh.bytes()]);
|
|
Ok(())
|
|
}
|
|
|
|
pub fn movr(&mut self, dst: RegH, src: RegH, width: Width) {
|
|
if width == Width::B16 {
|
|
self.data.push(0x66);
|
|
}
|
|
if src.requires_rex(width) || dst.requires_rex(width) {
|
|
self.data.push(rex(width, src, 0, dst));
|
|
}
|
|
self.data.push(0x88 | width.gt8() as u8);
|
|
self.data.push(modrm_regs(src, dst));
|
|
}
|
|
|
|
pub fn movm(&mut self, reg: RegWH, offset: u32, val: u32) {
|
|
self.data.extend([rex(1, reg, 0, 0), 0xc7]);
|
|
self.modrm_regdisp(reg, offset);
|
|
self.data.extend(val.to_le_bytes());
|
|
}
|
|
|
|
pub fn lea(&mut self, dst: RegWH, sym: Symbol) {
|
|
self.data
|
|
.extend([rex(1, dst, 0, 0), 0x8d, modrm_disp32(dst)]);
|
|
self.sym_offset4(sym);
|
|
}
|
|
|
|
pub fn int(&mut self, code: u8) {
|
|
self.data.extend([0xcd, code])
|
|
}
|
|
|
|
pub fn syscall(&mut self) {
|
|
self.data.extend([0x0f, 0x05])
|
|
}
|
|
|
|
pub fn call(&mut self, sym: Symbol) {
|
|
self.data.push(0xe8);
|
|
self.sym_offset4(sym);
|
|
}
|
|
|
|
pub fn callm(&mut self, sym: Symbol) {
|
|
self.data.extend([0xff, 0x15]);
|
|
self.sym_offset4(sym);
|
|
}
|
|
|
|
pub fn ret(&mut self) {
|
|
self.data.push(0xc3);
|
|
}
|
|
|
|
pub fn pushr(&mut self, reg: Reg) {
|
|
if reg.gt8() {
|
|
self.data.push(0x41);
|
|
}
|
|
self.data.push(0x50 | reg.base());
|
|
}
|
|
|
|
pub fn pushi(&mut self, imm: u32) {
|
|
const U8: u32 = 2 << 8;
|
|
if let 0..U8 = imm {
|
|
self.data.push(0x6a);
|
|
self.data.push(imm as u8);
|
|
} else {
|
|
self.data.push(0x68);
|
|
self.data.extend(imm.to_le_bytes());
|
|
}
|
|
}
|
|
|
|
pub fn pop(&mut self, reg: Reg) {
|
|
if reg.gt8() {
|
|
self.data.push(0x41);
|
|
}
|
|
self.data.push(0x58 | reg.base());
|
|
}
|
|
|
|
/// inserts a 32 bit offset from a symbol
|
|
pub fn sym_offset4(&mut self, sym: Symbol) {
|
|
let Some(addr) = self.sym_tab.get(sym) else {
|
|
let pos = self.data.len();
|
|
self.data.extend([0; 4]);
|
|
if self.program.sym_info(sym).external {
|
|
self.sym_refs.entry(sym).or_default().push(pos);
|
|
} else {
|
|
self.missing.push((pos, sym));
|
|
}
|
|
return;
|
|
};
|
|
self.data.extend(addr_offset(self.data.len(), addr));
|
|
}
|
|
|
|
pub fn asm(&mut self, instr: Instr) -> Result<(), CompilerMsg> {
|
|
match instr {
|
|
Instr::Movr { dst, src, width } => self.movr(dst, src, width),
|
|
Instr::Movi { dst, imm } => self.movi(dst, imm)?,
|
|
Instr::Movm { reg, offset, val } => self.movm(reg, offset, val),
|
|
Instr::Int(code) => self.int(code),
|
|
Instr::Syscall => self.syscall(),
|
|
Instr::Lea { dst, sym } => self.lea(dst, sym),
|
|
Instr::Call(sym) => self.call(sym),
|
|
Instr::Callm(sym) => self.callm(sym),
|
|
Instr::Ret => self.ret(),
|
|
Instr::Pushr(reg) => self.pushr(reg),
|
|
Instr::Pushi(imm) => self.pushi(imm),
|
|
Instr::Pop(reg) => self.pop(reg),
|
|
Instr::Sub => self.data.extend([0x48, 0x83, 0xec, 0x28]),
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
pub fn modrm_regdisp(&mut self, reg: impl Into<Reg>, disp: u32) {
|
|
let reg = reg.into();
|
|
let disp8 = disp < u8::MAX as u32;
|
|
let mod_ = if disp8 { 0b01 } else { 0b10 };
|
|
self.data.push(modrm(mod_, 0, reg.base()));
|
|
if reg == rsp.reg {
|
|
// SIB
|
|
self.data.push(0x24);
|
|
}
|
|
if disp8 {
|
|
self.data.push(disp as u8);
|
|
} else {
|
|
self.data.extend(disp.to_le_bytes());
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> Encoder<'a> {
|
|
pub fn new(program: &'a Program<X86_64>) -> Self {
|
|
Self {
|
|
data: Default::default(),
|
|
sym_tab: SymTable::new(program.sym_count()),
|
|
missing: Default::default(),
|
|
sym_refs: Default::default(),
|
|
program,
|
|
}
|
|
}
|
|
}
|