diff --git a/src/arch/x86_64/encode.rs b/src/arch/x86_64/encode.rs index 1f44a7c..1ce1c97 100644 --- a/src/arch/x86_64/encode.rs +++ b/src/arch/x86_64/encode.rs @@ -23,10 +23,9 @@ impl Code { if dst.incompatible(&src) { return Err("incompatible registers due to rex".into()); } - let width = dst.width(); - self.prefix16(width); - self.rex(width, src, 0, dst); - self.bytes.push(0x88 | width.not8()); + self.prefix16(dst); + self.rex(dst, src, 0, dst); + self.bytes.push(0x88 | dst.not8()); self.modrm(src, dst); } RegImmMem::Imm(src) => { @@ -56,7 +55,7 @@ impl Code { if dst.high() && src.reg.gt8() { return Err("registers incompatible (REX)".into()); } - self.prefix32(&src)?; + self.prefix32(src)?; self.prefix16(dst); self.rex(dst, dst, 0, src); self.bytes.push(0x8a | dst.not8()); @@ -71,7 +70,7 @@ impl Code { if src.high() && dst.reg.gt8() { return Err("registers incompatible (REX)".into()); } - self.prefix32(&dst)?; + self.prefix32(dst)?; self.prefix16(src); self.rex(dst, src, 0, dst); self.bytes.push(0x88 | src.not8()); @@ -90,7 +89,7 @@ impl Code { if src_width > dst.width { return Err("source cannot fit in destination".into()); } - self.prefix32(&dst)?; + self.prefix32(dst)?; self.prefix16(encode_width); self.rex(dst, 0, 0, dst); self.bytes.push(0xc6 | encode_width.not8()); @@ -172,39 +171,42 @@ impl Code { self.bytes.push(0xc3); } - fn add_sub(&mut self, dst: Reg, src: impl Into, ext: u8) -> ERes { + fn add_sub(&mut self, dst: impl RegMem_, src: impl Into, ext: u8) -> ERes { let mut src = src.into(); - let mut width = src.width_signed()?; + let mut imm_width = src.width_signed()?; let dst_width = dst.width().min(Width::B32); - self.prefix16(dst_width); - self.rex(dst, 0, 0, dst); - - if width > dst_width { - width = src.width_unsigned()?; - if dst.width() == Width::B64 || width > dst_width { + if imm_width > dst_width { + imm_width = src.width_unsigned()?; + if dst.width() == Width::B64 || imm_width > dst_width { return Err("immediate overflow".into()); } src = src.reinterpret(dst_width); - width = src.width_signed()?; + imm_width = src.width_signed()?; } - - if dst.width() == Width::B8 { - self.bytes.push(0x80); - } else if width == Width::B8 { - self.bytes.push(0x83); + let code = if dst.width() == Width::B8 { + 0x80 + } else if imm_width == Width::B8 { + 0x83 } else { - self.bytes.push(0x81); - width = dst_width; - } + imm_width = dst_width; + 0x81 + }; + self.prefix32(dst)?; + self.prefix16(dst_width); + self.rex(dst, 0, 0, dst); + self.bytes.push(code); self.modrm(ext, dst); - self.imm(src, width); + self.imm(src, imm_width); Ok(()) } - pub fn add(&mut self, dst: Reg, src: impl Into) -> ERes { - self.add_sub(dst, src, 0) + pub fn add(&mut self, dst: impl Into, src: impl Into) -> ERes { + match dst.into() { + RegMem::Reg(dst) => self.add_sub(dst, src, 0), + RegMem::Mem(dst) => self.add_sub(dst, src, 0), + } } pub fn sub(&mut self, dst: Reg, src: impl Into) -> ERes { @@ -217,7 +219,10 @@ impl Code { } } - fn prefix32(&mut self, mem: &Mem) -> Result<(), CompilerMsg> { + fn prefix32(&mut self, mem: impl MaybeMem) -> Result<(), CompilerMsg> { + let Some(mem) = mem.mem() else { + return Ok(()); + }; match mem.reg.width() { Width::B8 | Width::B16 => return Err("invalid register width".into()), Width::B32 => self.bytes.push(0x67), diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 109e0f3..3346743 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,6 +1,5 @@ mod compile; mod encode; -mod reg; #[cfg(test)] mod test; mod types; @@ -14,7 +13,6 @@ use crate::{ pub use compile::*; pub use encode::*; -pub use reg::*; pub use types::*; use util::*; diff --git a/src/arch/x86_64/test/asm/mod.rs b/src/arch/x86_64/test/asm/mod.rs index 9cd7a02..21217a3 100644 --- a/src/arch/x86_64/test/asm/mod.rs +++ b/src/arch/x86_64/test/asm/mod.rs @@ -46,6 +46,12 @@ fn add_sub() { } } + for dst in mems() { + for src in imms() { + eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src)) + } + } + for dst in regs() { for src in imms() { eq(c, format!("sub {dst}, {src}"), |c| c.sub(dst, src)) diff --git a/src/arch/x86_64/types/arg.rs b/src/arch/x86_64/types/arg.rs new file mode 100644 index 0000000..24e2cef --- /dev/null +++ b/src/arch/x86_64/types/arg.rs @@ -0,0 +1,241 @@ +use super::*; +use crate::backend::Symbol; + +#[derive(Clone, Copy)] +pub enum RegImmMem { + Reg(Reg), + Imm(Imm), + Mem(Mem), +} + +#[derive(Clone, Copy)] +pub enum RegMem { + Reg(Reg), + Mem(Mem), +} + +pub trait RegMem_: RexBit + RexW + ModRMRM + Copy + MaybeMem { + fn width(&self) -> Width; +} + +pub trait MaybeMem { + fn mem(&self) -> Option; +} + +impl RegMem_ for Reg { + fn width(&self) -> Width { + self.width() + } +} + +impl MaybeMem for Reg { + fn mem(&self) -> Option { + None + } +} + +impl RegMem_ for Mem { + fn width(&self) -> Width { + self.width + } +} + +impl MaybeMem for Mem { + fn mem(&self) -> Option { + Some(*self) + } +} + +// fromrot +impl From for RegImmMem { + fn from(value: Reg) -> Self { + Self::Reg(value) + } +} + +impl From for RegMem { + fn from(value: Reg) -> Self { + Self::Reg(value) + } +} + +impl From for RegImmMem { + fn from(value: Mem) -> Self { + Self::Mem(value) + } +} + +impl From for RegMem { + fn from(value: Mem) -> Self { + Self::Mem(value) + } +} + +impl From for RegImmMem { + fn from(value: u64) -> Self { + Self::Imm(value.into()) + } +} + +impl From for RegImmMem { + fn from(value: i64) -> Self { + Self::Imm(value.into()) + } +} + +impl From for RegImmMem { + fn from(value: i32) -> Self { + Self::Imm(value.into()) + } +} + +impl From for RegImmMem { + fn from(value: i128) -> Self { + Self::Imm(value.into()) + } +} + +pub trait ModRMRM { + fn rm(&self) -> u8; + fn addr(&self) -> EffAddr; +} + +pub enum EffAddr { + Mem0, + Mem8(i8), + Mem32(i32), + Sym(Symbol), + None, +} + +impl ModRMRM for Reg { + fn rm(&self) -> u8 { + self.base() + } + fn addr(&self) -> EffAddr { + EffAddr::None + } +} + +impl ModRMRM for Mem { + fn rm(&self) -> u8 { + self.reg.base() + } + fn addr(&self) -> EffAddr { + const I8_MIN: i32 = i8::MIN as i32; + const I8_MAX: i32 = i8::MAX as i32; + let disp = self.disp; + match disp { + 0 => { + if self.reg.base() == 0b101 { + EffAddr::Mem8(0) + } else { + EffAddr::Mem0 + } + } + I8_MIN..=I8_MAX => EffAddr::Mem8(disp as i8), + _ => EffAddr::Mem32(disp), + } + } +} + +impl ModRMRM for i32 { + fn rm(&self) -> u8 { + 0b101 + } + fn addr(&self) -> EffAddr { + EffAddr::Mem32(*self) + } +} + +impl ModRMRM for Symbol { + fn rm(&self) -> u8 { + 0b101 + } + + fn addr(&self) -> EffAddr { + EffAddr::Sym(*self) + } +} + +impl ModRMReg for u8 { + fn val(&self) -> u8 { + *self + } +} + +impl ModRMReg for Reg { + fn val(&self) -> u8 { + self.base() + } +} + +pub trait ModRMReg { + fn val(&self) -> u8; +} + +#[inline(always)] +pub fn rex(w: impl RexW, r: impl RexBit, x: u8, b: impl RexBit) -> u8 { + 0b0100_0000 | bit(w.rexw(), 3) | bit(r.rex(), 2) | bit(x.rex(), 1) | bit(b.rex(), 0) +} + +#[inline(always)] +fn bit(val: bool, pos: u8) -> u8 { + (val as u8) << pos +} + +pub trait RexBit: Sized { + fn rex(&self) -> bool; + fn req(&self) -> bool { + false + } +} + +impl RexBit for u8 { + fn rex(&self) -> bool { + *self != 0 + } +} + +impl RexBit for Reg { + fn rex(&self) -> bool { + self.gt8() + } + fn req(&self) -> bool { + self.gt4() && (self.width() == Width::B8) && !self.high() + } +} + +impl RexBit for Mem { + fn rex(&self) -> bool { + self.reg.rex() + } +} + +pub trait RexW { + fn rexw(&self) -> bool; +} + +impl RexW for Width { + fn rexw(&self) -> bool { + *self == Width::B64 + } +} + +impl RexW for Reg { + fn rexw(&self) -> bool { + self.width().rexw() + } +} + +impl RexW for u8 { + fn rexw(&self) -> bool { + *self == 1 + } +} + +impl RexW for Mem { + fn rexw(&self) -> bool { + self.width.rexw() + } +} diff --git a/src/arch/x86_64/types.rs b/src/arch/x86_64/types/imm.rs similarity index 56% rename from src/arch/x86_64/types.rs rename to src/arch/x86_64/types/imm.rs index 3f592b6..7e8edd3 100644 --- a/src/arch/x86_64/types.rs +++ b/src/arch/x86_64/types/imm.rs @@ -1,34 +1,10 @@ +use super::Width; +use crate::io::CompilerMsg; use std::num::TryFromIntError; -use super::*; - -#[derive(Clone, Copy)] -pub struct Mem { - pub reg: Reg, - pub disp: i32, - pub width: Width, -} - -#[derive(Clone, Copy)] -pub enum RegImmMem { - Reg(Reg), - Imm(Imm), - Mem(Mem), -} - -#[derive(Clone, Copy)] -pub enum RegMem { - Reg(Reg), - Mem(Mem), -} - #[derive(Clone, Copy, PartialEq, PartialOrd)] pub struct Imm(pub i128); -pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem { - Mem { reg, disp, width } -} - impl Imm { pub fn overflow_msg() -> CompilerMsg { "immediate overflow".into() @@ -72,68 +48,6 @@ impl TryFrom for u8 { } } -impl std::fmt::Display for Mem { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Mem { reg, disp, width } = *self; - let size = match width { - Width::B8 => "BYTE", - Width::B16 => "WORD", - Width::B32 => "DWORD", - Width::B64 => "QWORD", - }; - write!(f, "{size} [{reg} {}]", signed_hex(disp as i128, true)) - } -} - -// fromrot -impl From for RegImmMem { - fn from(value: Reg) -> Self { - Self::Reg(value) - } -} - -impl From for RegMem { - fn from(value: Reg) -> Self { - Self::Reg(value) - } -} - -impl From for RegImmMem { - fn from(value: Mem) -> Self { - Self::Mem(value) - } -} - -impl From for RegMem { - fn from(value: Mem) -> Self { - Self::Mem(value) - } -} - -impl From for RegImmMem { - fn from(value: u64) -> Self { - Self::Imm(value.into()) - } -} - -impl From for RegImmMem { - fn from(value: i64) -> Self { - Self::Imm(value.into()) - } -} - -impl From for RegImmMem { - fn from(value: i32) -> Self { - Self::Imm(value.into()) - } -} - -impl From for RegImmMem { - fn from(value: i128) -> Self { - Self::Imm(value.into()) - } -} - impl From for Imm { fn from(value: u64) -> Self { Self(value as i128) diff --git a/src/arch/x86_64/types/mem.rs b/src/arch/x86_64/types/mem.rs new file mode 100644 index 0000000..f0628b0 --- /dev/null +++ b/src/arch/x86_64/types/mem.rs @@ -0,0 +1,27 @@ +use crate::arch::x86_64::util::signed_hex; + +use super::*; + +#[derive(Clone, Copy)] +pub struct Mem { + pub reg: Reg, + pub disp: i32, + pub width: Width, +} + +pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem { + Mem { reg, disp, width } +} + +impl std::fmt::Display for Mem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Mem { reg, disp, width } = *self; + let size = match width { + Width::B8 => "BYTE", + Width::B16 => "WORD", + Width::B32 => "DWORD", + Width::B64 => "QWORD", + }; + write!(f, "{size} [{reg} {}]", signed_hex(disp as i128, true)) + } +} diff --git a/src/arch/x86_64/types/mod.rs b/src/arch/x86_64/types/mod.rs new file mode 100644 index 0000000..3bb1aa0 --- /dev/null +++ b/src/arch/x86_64/types/mod.rs @@ -0,0 +1,11 @@ +mod arg; +mod imm; +mod mem; +mod reg; +mod width; + +pub use arg::*; +pub use imm::*; +pub use mem::*; +pub use reg::*; +pub use width::*; diff --git a/src/arch/x86_64/reg.rs b/src/arch/x86_64/types/reg.rs similarity index 95% rename from src/arch/x86_64/reg.rs rename to src/arch/x86_64/types/reg.rs index 1f4d306..7650f09 100644 --- a/src/arch/x86_64/reg.rs +++ b/src/arch/x86_64/types/reg.rs @@ -1,3 +1,5 @@ +use super::Width; + #[derive(Clone, Copy, PartialEq)] pub struct Reg { val: u8, @@ -5,15 +7,6 @@ pub struct Reg { width: Width, } -#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] -#[repr(u8)] -pub enum Width { - B8 = 0, - B16 = 1, - B32 = 2, - B64 = 3, -} - def_regs! { 0b0000 : rax eax ax al, 0b0001 : rcx ecx cx cl !_, @@ -189,9 +182,3 @@ macro_rules! def_regs { use def_regs; use crate::arch::x86_64::Imm; - -impl From for Width { - fn from(value: Reg) -> Self { - value.width - } -} diff --git a/src/arch/x86_64/types/width.rs b/src/arch/x86_64/types/width.rs new file mode 100644 index 0000000..0241cc9 --- /dev/null +++ b/src/arch/x86_64/types/width.rs @@ -0,0 +1,22 @@ +use super::*; + +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] +#[repr(u8)] +pub enum Width { + B8 = 0, + B16 = 1, + B32 = 2, + B64 = 3, +} + +impl From for Width { + fn from(value: Reg) -> Self { + value.width() + } +} + +impl From for Width { + fn from(value: Mem) -> Self { + value.width + } +} diff --git a/src/arch/x86_64/util.rs b/src/arch/x86_64/util.rs index 156e227..c5c06c7 100644 --- a/src/arch/x86_64/util.rs +++ b/src/arch/x86_64/util.rs @@ -1,152 +1,3 @@ -use crate::backend::Symbol; - -use super::*; - -pub trait ModRMRM { - fn rm(&self) -> u8; - fn addr(&self) -> EffAddr; -} - -pub enum EffAddr { - Mem0, - Mem8(i8), - Mem32(i32), - Sym(Symbol), - None, -} - -impl ModRMRM for Reg { - fn rm(&self) -> u8 { - self.base() - } - fn addr(&self) -> EffAddr { - EffAddr::None - } -} - -impl ModRMRM for Mem { - fn rm(&self) -> u8 { - self.reg.base() - } - fn addr(&self) -> EffAddr { - const I8_MIN: i32 = i8::MIN as i32; - const I8_MAX: i32 = i8::MAX as i32; - let disp = self.disp; - match disp { - 0 => { - if self.reg.base() == 0b101 { - EffAddr::Mem8(0) - } else { - EffAddr::Mem0 - } - } - I8_MIN..=I8_MAX => EffAddr::Mem8(disp as i8), - _ => EffAddr::Mem32(disp), - } - } -} - -impl ModRMRM for i32 { - fn rm(&self) -> u8 { - 0b101 - } - fn addr(&self) -> EffAddr { - EffAddr::Mem32(*self) - } -} - -impl ModRMRM for Symbol { - fn rm(&self) -> u8 { - 0b101 - } - - fn addr(&self) -> EffAddr { - EffAddr::Sym(*self) - } -} - -impl ModRMReg for u8 { - fn val(&self) -> u8 { - *self - } -} - -impl ModRMReg for Reg { - fn val(&self) -> u8 { - self.base() - } -} - -pub trait ModRMReg { - fn val(&self) -> u8; -} - -#[inline(always)] -pub fn rex(w: impl RexW, r: impl RexBit, x: u8, b: impl RexBit) -> u8 { - 0b0100_0000 | bit(w.rexw(), 3) | bit(r.rex(), 2) | bit(x.rex(), 1) | bit(b.rex(), 0) -} - -#[inline(always)] -fn bit(val: bool, pos: u8) -> u8 { - (val as u8) << pos -} - -pub trait RexBit: Sized { - fn rex(&self) -> bool; - fn req(&self) -> bool { - false - } -} - -impl RexBit for u8 { - fn rex(&self) -> bool { - *self != 0 - } -} - -impl RexBit for Reg { - fn rex(&self) -> bool { - self.gt8() - } - fn req(&self) -> bool { - self.gt4() && (self.width() == Width::B8) && !self.high() - } -} - -impl RexBit for Mem { - fn rex(&self) -> bool { - self.reg.rex() - } -} - -pub trait RexW { - fn rexw(&self) -> bool; -} - -impl RexW for Width { - fn rexw(&self) -> bool { - *self == Width::B64 - } -} - -impl RexW for Reg { - fn rexw(&self) -> bool { - self.width().rexw() - } -} - -impl RexW for u8 { - fn rexw(&self) -> bool { - *self == 1 - } -} - -impl RexW for Mem { - fn rexw(&self) -> bool { - self.width.rexw() - } -} - /// assumes the next instruction is directly after pub fn addr_offset(pos: usize, addr: u64) -> [u8; 4] { let pos = (pos + 4) as i32; diff --git a/test/nasm_cache/mov b/test/nasm_cache/mov index 9e836a3..ed0bb4f 100644 Binary files a/test/nasm_cache/mov and b/test/nasm_cache/mov differ