use crate::{ backend::{ program::{Addr, SymTable}, symbol::Symbol, }, io::CompilerMsg, }; pub struct Asm { pub instrs: Vec, } pub enum Instr { Mov { dst: RegMode, src: RegImm }, Int { code: u8 }, Lea { dst: RegMode, sym: Symbol }, } pub enum RegImm { Reg(RegMode), Imm(u64), } pub struct Reg(u8); pub struct RegMode { reg: Reg, width: BitWidth, high: bool, } impl super::super::program::Instr for Instr { fn encode( &self, data: &mut Vec, syms: &mut SymTable, ) -> Result, CompilerMsg> { match self { Instr::Mov { dst, src } => { let width = dst.width; if width == BitWidth::B16 { data.push(0x66); } let dst8 = dst.gt8(); let b64 = width == BitWidth::B64; let b8 = width == BitWidth::B8; let src8 = if let RegImm::Reg(src) = src { src.gt8() } else { false }; // special 64-bit / register 4-7 indicator if dst8 || src8 || b64 || (dst.gt4() && !dst.high) { data.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2)); } match src { RegImm::Reg(src) => { if dst.width != src.width { return Err("src and dst are not the same size".into()); } data.push(0x88 | !b8 as u8); let modrm = 0b11_000_000 | (src.base() << 3) | dst.base(); data.push(modrm); } &RegImm::Imm(imm) => { if imm > width.max() { return Err("immediate cannot fit in register".into()); } data.push(0xb0 | ((!b8 as u8) << 3) | dst.base()); data.extend(&imm.to_le_bytes()[..width.bytes()]); } } } Instr::Int { code } => data.extend([0xcd, *code]), Instr::Lea { dst, sym } => { data.extend([ 0x48 | ((dst.gt8() as u8) << 2), 0x8d, 0x05 | (dst.base() << 3), ]); let Some(addr) = syms.get(*sym) else { let pos = data.len(); data.extend([0; 4]); return Ok(Some((*sym, pos))); }; data.extend(addr_offset(data.len(), addr)); } } Ok(None) } fn insert_sym(&self, data: &mut Vec, pos: usize, addr: Addr) { match self { Self::Lea { .. } => data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)), _ => panic!("unkown symbol insertion"), } } } /// assumes the next instruction is directly after fn addr_offset(pos: usize, addr: Addr) -> [u8; 4] { let pos = (pos + 4) as i32; let offset = addr.val() as i32 - pos; offset.to_le_bytes() } impl RegMode { pub fn base(&self) -> u8 { self.reg.0 & 0b111 } /// checks if register is not one of the first 8 (0-7) pub fn gt8(&self) -> bool { self.reg.0 >= 0b1000 } pub fn gt4(&self) -> bool { self.reg.0 >= 0b0100 } } macro_rules! def_regs { ($($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $($B8H:ident=$hval:expr)?,)*) => { #[allow(non_upper_case_globals)] pub mod reg { use super::{RegMode, BitWidth, Reg}; $( pub const $B64: RegMode = RegMode { reg: Reg($val), width: BitWidth::B64, high: false }; pub const $B32: RegMode = RegMode { reg: Reg($val), width: BitWidth::B32, high: false }; pub const $B16: RegMode = RegMode { reg: Reg($val), width: BitWidth::B16, high: false }; pub const $B8 : RegMode = RegMode { reg: Reg($val), width: BitWidth::B8, high: false }; $( pub const $B8H: RegMode = RegMode { reg: $hval.reg, width: BitWidth::B8, high: true }; )? )* } impl RegMode { pub fn parse(s: &str) -> Option { Some(match s.to_lowercase().as_str() { $( stringify!($B64) => reg::$B64, stringify!($B32) => reg::$B32, stringify!($B16) => reg::$B16, stringify!($B8 ) => reg::$B8, $( stringify!($B8H) => reg::$B8H, )? )* _ => return None, }) } } }; } def_regs! { 0b0000 : rax eax ax al ah=spl, 0b0001 : rcx ecx cx cl ch=bpl, 0b0010 : rdx edx dx dl dh=sil, 0b0011 : rbx ebx bx bl bh=dil, 0b0100 : rsp esp sp spl, 0b0101 : rbp ebp bp bpl, 0b0110 : rsi esi si sil, 0b0111 : rdi edi di dil, 0b1000 : r8 r8d r8w r8b, 0b1001 : r9 r9d r9w r9b, 0b1010 : r10 r10d r10w r10b, 0b1011 : r11 r11d r11w r11b, 0b1100 : r12 r12d r12w r12b, 0b1101 : r13 r13d r13w r13b, 0b1110 : r14 r14d r14w r14b, 0b1111 : r15 r15d r15w r15b, } #[derive(Clone, Copy, PartialEq)] pub enum BitWidth { B64, B32, B16, B8, } impl BitWidth { pub const fn max(&self) -> u64 { match self { Self::B64 => u64::MAX, Self::B32 => u32::MAX as u64, Self::B16 => u16::MAX as u64, Self::B8 => u8::MAX as u64, } } pub const fn bytes(&self) -> usize { match self { Self::B64 => 8, Self::B32 => 4, Self::B16 => 2, Self::B8 => 1, } } } pub mod instr { use super::*; pub fn mov(dst: RegMode, src: impl Into) -> Instr { Instr::Mov { dst, src: src.into(), } } pub fn lea(dst: RegMode, sym: Symbol) -> Instr { Instr::Lea { dst, sym } } pub fn int(code: u8) -> Instr { Instr::Int { code } } } impl From for RegImm { fn from(value: RegMode) -> Self { Self::Reg(value) } } impl From for RegImm { fn from(value: u64) -> Self { Self::Imm(value) } } #[cfg(test)] mod test { use crate::backend::program::Instr as _; use super::*; use instr::*; use reg::*; fn eq(expected: impl AsRef<[u8]>, got: Instr) { let expected = expected.as_ref(); let mut res = Vec::new(); if let Err(e) = got.encode(&mut res, &mut SymTable::new(0)) { panic!("expected {expected:x?}, failed to compile: {}", e.msg); } assert_eq!(expected, &res[..], "expected {expected:x?}, got {res:x?}"); } #[test] fn reg_reg() { // used objdump on some nasm compiled assembly eq([0x48, 0x89, 0xd8], mov(rax, rbx)); eq([0x89, 0xd8], mov(eax, ebx)); eq([0x66, 0x89, 0xd8], mov(ax, bx)); eq([0x88, 0xd8], mov(al, bl)); eq([0x88, 0xfc], mov(ah, bh)); eq([0x88, 0xf8], mov(al, bh)); eq([0x88, 0xdc], mov(ah, bl)); eq([0x40, 0x88, 0xe7], mov(dil, spl)); eq([0x4d, 0x89, 0xc8], mov(r8, r9)); eq([0x45, 0x89, 0xc8], mov(r8d, r9d)); eq([0x66, 0x45, 0x89, 0xc8], mov(r8w, r9w)); eq([0x45, 0x88, 0xc8], mov(r8b, r9b)); eq([0x49, 0x89, 0xc0], mov(r8, rax)); eq([0x4c, 0x89, 0xc0], mov(rax, r8)); eq([0x4d, 0x89, 0xd1], mov(r9, r10)); eq([0x4d, 0x89, 0xe0], mov(r8, r12)); } #[test] fn reg_imm() { eq( [0x49, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(r15, 0x123456789abcdef0), ); eq( [0x49, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(r8, 0x123456789abcdef0), ); eq( [0x49, 0xb9, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(r9, 0x123456789abcdef0), ); eq([0x41, 0xb9, 0x78, 0x56, 0x34, 0x12], mov(r9d, 0x12345678)); eq([0x66, 0x41, 0xb9, 0x34, 0x12], mov(r9w, 0x1234)); eq([0x41, 0xb1, 0x12], mov(r9b, 0x12)); eq([0x41, 0xb0, 0x12], mov(r8b, 0x12)); eq([0x41, 0xb7, 0x12], mov(r15b, 0x12)); eq( [0x48, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(rax, 0x123456789abcdef0), ); eq( [0x48, 0xbb, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(rbx, 0x123456789abcdef0), ); eq( [0x48, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(rdi, 0x123456789abcdef0), ); eq([0xbb, 0x78, 0x56, 0x34, 0x12], mov(ebx, 0x12345678)); eq([0x66, 0xbb, 0x34, 0x12], mov(bx, 0x1234)); eq([0xb3, 0x12], mov(bl, 0x12)); eq([0xb7, 0x12], mov(bh, 0x12)); eq([0xb4, 0x12], mov(ah, 0x12)); eq([0x40, 0xb7, 0x12], mov(dil, 0x12)); } }