From 7280f7b0718b54d6946e0c6f7b4eb56d19cf16cb Mon Sep 17 00:00:00 2001 From: Shadow Cat Date: Fri, 12 Jun 2026 17:08:42 -0400 Subject: [PATCH] give immediates a sign & fix stuff --- src/arch/x86_64/encode.rs | 125 ++++++++----------------- src/arch/x86_64/mod.rs | 3 + src/arch/x86_64/reg.rs | 37 ++------ src/arch/x86_64/test/nasm.rs | 73 ++++++++------- src/arch/x86_64/types.rs | 162 +++++++++++++++++++++++++++++++++ src/arch/x86_64/util.rs | 20 ++++ src/parser/nodes/asm/x86_64.rs | 12 ++- 7 files changed, 282 insertions(+), 150 deletions(-) create mode 100644 src/arch/x86_64/types.rs diff --git a/src/arch/x86_64/encode.rs b/src/arch/x86_64/encode.rs index 684a298..2bcdea5 100644 --- a/src/arch/x86_64/encode.rs +++ b/src/arch/x86_64/encode.rs @@ -1,7 +1,5 @@ -use crate::backend::Symbol; - use super::*; -use util::*; +use crate::backend::Symbol; type ERes = Result<(), CompilerMsg>; @@ -12,30 +10,6 @@ pub struct Code { pub(super) missing: Vec<(usize, Symbol)>, } -#[derive(Clone, Copy)] -pub struct Mem { - pub reg: Reg, - pub disp: i32, - pub width: Width, -} - -#[derive(Clone, Copy)] -pub enum RegImmMem { - Reg(Reg), - Imm(u64), - Mem(Mem), -} - -#[derive(Clone, Copy)] -pub enum RegMem { - Reg(Reg), - Mem(Mem), -} - -pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem { - Mem { reg, disp, width } -} - impl Code { pub fn mov(&mut self, dst: impl Into, src: impl Into) -> ERes { let dst = dst.into(); @@ -58,42 +32,56 @@ impl Code { self.bytes.push(modrm_regs(src, dst)); } RegImmMem::Imm(src) => { - let src_width = Width::fit(src); + let src_width = src.width_unsigned()?; if src_width > dst.width() { return Err("immediate cannot fit in register".into()); } self.prefix16(dst); - if src_width <= Width::B32 { - dst.lower64(); + if dst.width() == Width::B64 && src_width <= Width::B32 && src.0 < 0 { + self.bytes + .extend([rex(dst.width(), 0, 0, dst), 0xc7, 0xc0 | dst.base()]); + self.imm(src, Width::B32); + } else { + if src_width <= Width::B32 { + dst = dst.lower64(); + } + if dst.requires_rex() { + self.bytes.push(rex(dst.width(), 0, 0, dst)); + } + let opcode = 0xb0 | ((dst.width().gt8() as u8) << 3); + self.bytes.push(opcode | dst.base()); + self.imm(src, dst.width()); } - if dst.requires_rex() { - self.bytes.push(rex(dst.width(), 0, 0, dst)); - } - let opcode = 0xb0 | ((dst.width().gt8() as u8) << 3); - self.bytes.push(opcode | dst.base()); - self.bytes.extend(&src.to_le_bytes()[..dst.width().bytes()]); } RegImmMem::Mem(src) => todo!(), }, RegMem::Mem(dst) => match src { RegImmMem::Reg(src) => todo!(), RegImmMem::Imm(src) => { - let src_width = Width::fit(src); + let encode_width = dst.width.min(Width::B32); + let src_width = if dst.width == Width::B64 { + src.width_signed() + } else { + src.width_unsigned() + }?; if src_width == Width::B64 { return Err("cannot move 64 bit immediate into memory".into()); } + if src_width > dst.width { + return Err("source cannot fit in destination".into()); + } match dst.reg.width() { Width::B8 | Width::B16 => return Err("invalid register width".into()), Width::B32 => self.bytes.push(0x67), Width::B64 => (), } - self.prefix16(src_width); - if dst.reg.requires_mem_rex() { - self.bytes.push(rex(src_width, 0, 0, dst.reg)); + self.prefix16(encode_width); + if dst.reg.requires_mem_rex() || dst.width == Width::B64 { + self.bytes.push(rex(dst.width, 0, 0, dst.reg)); } - self.bytes.push(0xc6 | (src_width != Width::B8) as u8); + self.bytes.push(0xc6 | (encode_width != Width::B8) as u8); self.modrm_regdisp(dst.reg, dst.disp); - self.bytes.extend(&src.to_le_bytes()[..src_width.bytes()]); + self.imm(src, encode_width); } RegImmMem::Mem(_) => return Err("cannot move memory to memory".into()), }, @@ -113,14 +101,14 @@ impl Code { Width::B16 => {} _ => return Err("register must be 64 or 16 bit".into()), }, - RegImmMem::Imm(imm) => match Width::fit(imm) { + RegImmMem::Imm(imm) => match imm.width_unsigned()? { Width::B8 => { self.bytes.push(0x6a); - self.bytes.push(imm as u8); + self.bytes.push(imm.0 as u8); } Width::B16 | Width::B32 => { self.bytes.push(0x68); - self.bytes.extend((imm as u32).to_le_bytes()); + self.bytes.extend((imm.0 as u32).to_le_bytes()); } Width::B64 => return Err("immediate must be 32 bit or less".into()), }, @@ -215,6 +203,10 @@ impl Code { self.missing .extend(other.missing.iter().map(|&(p, s)| (pos + p, s))); } + + fn imm(&mut self, imm: Imm, width: Width) { + self.bytes.extend(&imm.0.to_le_bytes()[..width.bytes()]); + } } pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result { @@ -222,46 +214,3 @@ pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result for RegImmMem { - fn from(value: Reg) -> Self { - Self::Reg(value) - } -} - -impl From for RegMem { - fn from(value: Reg) -> Self { - Self::Reg(value) - } -} - -impl From for RegImmMem { - fn from(value: Mem) -> Self { - Self::Mem(value) - } -} - -impl From for RegMem { - fn from(value: Mem) -> Self { - Self::Mem(value) - } -} - -impl From for RegImmMem { - fn from(value: u64) -> Self { - Self::Imm(value) - } -} - -impl From for RegImmMem { - fn from(value: i64) -> Self { - Self::Imm(value as u64) - } -} - -impl From for RegImmMem { - fn from(value: i32) -> Self { - Self::Imm(value as u64) - } -} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index aeb42e9..1fd20c2 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -2,6 +2,7 @@ mod compile; mod encode; mod reg; mod test; +mod types; mod util; use crate::{ @@ -14,6 +15,8 @@ pub use compile::*; pub use encode::*; pub use reg::*; pub use test::bin::run as bin_test; +pub use types::*; +use util::*; pub struct X86_64; diff --git a/src/arch/x86_64/reg.rs b/src/arch/x86_64/reg.rs index 5ae18a8..3217857 100644 --- a/src/arch/x86_64/reg.rs +++ b/src/arch/x86_64/reg.rs @@ -52,8 +52,10 @@ impl Reg { } /// if self has 64 bit width, changes width to 32 bit - pub fn lower64(&mut self) { - self.width.lower64() + pub fn lower64(&self) -> Self { + let mut new = *self; + new.width = new.width.min(Width::B32); + new } pub fn requires_rex(&self) -> bool { @@ -76,7 +78,7 @@ impl Reg { } impl Width { - pub const fn max(&self) -> u64 { + pub const fn max_val(&self) -> u64 { match self { Self::B64 => u64::MAX, Self::B32 => u32::MAX as u64, @@ -85,10 +87,8 @@ impl Width { } } - pub fn lower64(&mut self) { - if matches!(self, Width::B64) { - *self = Width::B32; - } + pub fn min(self, other: Self) -> Self { + if self <= other { self } else { other } } pub const fn bytes(&self) -> usize { @@ -100,27 +100,6 @@ impl Width { } } - pub const fn fit(val: u64) -> Self { - const B8: u64 = 1 << 8; - const B16: u64 = 1 << 16; - const B32: u64 = 1 << 32; - match val { - ..B8 => Self::B8, - B8..B16 => Self::B16, - B16..B32 => Self::B32, - B32.. => Self::B64, - } - } - - pub const fn fiti(val: u64) -> Self { - match val { - ..0x80 => Self::B8, - 0x80..0x8000 => Self::B16, - 0x8000..0x8000_0000 => Self::B32, - 0x8000_0000.. => Self::B64, - } - } - /// greater than 8 bits pub const fn gt8(&self) -> bool { !matches!(self, Self::B8) @@ -205,6 +184,8 @@ macro_rules! def_regs { use def_regs; +use crate::arch::x86_64::Imm; + impl From for Width { fn from(value: Reg) -> Self { value.width diff --git a/src/arch/x86_64/test/nasm.rs b/src/arch/x86_64/test/nasm.rs index e718c59..49456bc 100644 --- a/src/arch/x86_64/test/nasm.rs +++ b/src/arch/x86_64/test/nasm.rs @@ -11,40 +11,29 @@ const DISPS: &[i32] = &[ i32::MAX, ]; -const IMMS: &[u64] = &[ +const IMMS: &[i128] = &[ 0x0, - u8::MAX as u64, - u8::MAX as u64 + 1, - u16::MAX as u64, - u16::MAX as u64 + 1, - u32::MAX as u64, - u32::MAX as u64 + 1, - // nasm likes to think u64::MAX is -1i32 for some reason - i64::MAX as u64, + i8::MIN as i128, + i8::MAX as i128, + i16::MIN as i128, + i16::MAX as i128, + i32::MIN as i128, + i32::MAX as i128, + i64::MIN as i128, + i64::MAX as i128, + u8::MAX as i128, + u8::MAX as i128 + 1, + u16::MAX as i128, + u16::MAX as i128 + 1, + u32::MAX as i128, + u32::MAX as i128 + 1, + i64::MAX as i128, ]; +const WIDTHS: &[Width] = &[Width::B8, Width::B16, Width::B32, Width::B64]; + #[test] fn mov() { - for ® in Reg::IMPORTANT { - for &disp in DISPS { - for &imm in IMMS { - let width = Width::fit(imm); - let size = match width { - Width::B8 => "BYTE", - Width::B16 => "WORD", - Width::B32 => "DWORD", - Width::B64 => "QWORD", - }; - let ddisp = (disp as i64).abs(); - let sign = if disp < 0 { '-' } else { '+' }; - eq!( - format!("mov {size} [{reg}{sign}0x{ddisp:x}], 0x{imm:x}"), - mov(mem(reg, disp, width), imm) - ); - } - } - } - for &r1 in Reg::IMPORTANT { for &r2 in Reg::IMPORTANT { eq!(format!("mov {r1}, {r2}"), mov(r1, r2)); @@ -53,9 +42,29 @@ fn mov() { for &r1 in Reg::IMPORTANT { for &imm in IMMS { - eq!(format!("mov {r1}, 0x{imm:x}"), mov(r1, imm)); + eq!(format!("mov {r1}, {imm}"), mov(r1, imm)); } } + + for ® in Reg::IMPORTANT { + for &disp in DISPS { + for &imm in IMMS { + for &width in WIDTHS { + let mem = mem(reg, disp, width); + eq!(format!("mov {mem}, {imm}"), mov(mem, imm)); + } + } + } + } +} + +fn u_to_i(u: u64) -> i64 { + match u { + 0..0x100 => u as i8 as i64, + 0x100..0x10000 => u as i16 as i64, + 0x10000..0x100000000 => u as i32 as i64, + 0x100000000.. => u as i64, + } } macro_rules! eq { @@ -65,8 +74,8 @@ macro_rules! eq { let mut code = Code::default(); let res = code.$instr $args; match (expected, res) { - (Ok(_), Err(e)) => { - panic!("{asm}: failed to compile: {}", e.msg); + (Ok(expected), Err(e)) => { + panic!("{asm}: failed to compile: {}\nexpected: {expected:x?}", e.msg); } (Err(e), Ok(_)) => { let res = &code.bytes[..]; diff --git a/src/arch/x86_64/types.rs b/src/arch/x86_64/types.rs new file mode 100644 index 0000000..664cdb6 --- /dev/null +++ b/src/arch/x86_64/types.rs @@ -0,0 +1,162 @@ +use std::num::TryFromIntError; + +use super::*; + +#[derive(Clone, Copy)] +pub struct Mem { + pub reg: Reg, + pub disp: i32, + pub width: Width, +} + +#[derive(Clone, Copy)] +pub enum RegImmMem { + Reg(Reg), + Imm(Imm), + Mem(Mem), +} + +#[derive(Clone, Copy)] +pub enum RegMem { + Reg(Reg), + Mem(Mem), +} + +#[derive(Clone, Copy, PartialEq, PartialOrd)] +pub struct Imm(pub i128); + +pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem { + Mem { reg, disp, width } +} + +impl Imm { + pub fn overflow_msg() -> CompilerMsg { + "immediate overflow".into() + } + + pub fn width_signed(&self) -> Result { + Ok(match self.0 { + -0x80..=0x7f => Width::B8, + -0x8000..=0x7fff => Width::B16, + -0x8000_0000..=0x7fff_ffff => Width::B32, + -0x8000_0000_0000_0000..=0x7fff_ffff_ffff_ffff => Width::B64, + _ => return Err(Self::overflow_msg()), + }) + } + + pub fn width_unsigned(&self) -> Result { + Ok(match self.0 { + -0xff..=0xff => Width::B8, + -0xffff..=0xffff => Width::B16, + -0xffff_ffff..=0xffff_ffff => Width::B32, + -0xffff_ffff_ffff_ffff..=0xffff_ffff_ffff_ffff => Width::B64, + _ => return Err(Self::overflow_msg()), + }) + } +} + +impl TryFrom for u8 { + type Error = TryFromIntError; + + fn try_from(value: Imm) -> Result { + value.0.try_into() + } +} + +impl std::fmt::Display for Mem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Mem { reg, disp, width } = *self; + let size = match width { + Width::B8 => "BYTE", + Width::B16 => "WORD", + Width::B32 => "DWORD", + Width::B64 => "QWORD", + }; + write!(f, "{size} [{reg} {}]", signed_hex(disp as i128, true)) + } +} + +// fromrot +impl From for RegImmMem { + fn from(value: Reg) -> Self { + Self::Reg(value) + } +} + +impl From for RegMem { + fn from(value: Reg) -> Self { + Self::Reg(value) + } +} + +impl From for RegImmMem { + fn from(value: Mem) -> Self { + Self::Mem(value) + } +} + +impl From for RegMem { + fn from(value: Mem) -> Self { + Self::Mem(value) + } +} + +impl From for RegImmMem { + fn from(value: u64) -> Self { + Self::Imm(value.into()) + } +} + +impl From for RegImmMem { + fn from(value: i64) -> Self { + Self::Imm(value.into()) + } +} + +impl From for RegImmMem { + fn from(value: i32) -> Self { + Self::Imm(value.into()) + } +} + +impl From for RegImmMem { + fn from(value: i128) -> Self { + Self::Imm(value.into()) + } +} + +impl From for Imm { + fn from(value: u64) -> Self { + Self(value as i128) + } +} + +impl From for Imm { + fn from(value: i64) -> Self { + Self(value as i128) + } +} + +impl From for Imm { + fn from(value: i32) -> Self { + Self(value as i128) + } +} + +impl From for Imm { + fn from(value: i128) -> Self { + Self(value) + } +} + +impl std::fmt::Display for Imm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl std::fmt::Debug for Imm { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} diff --git a/src/arch/x86_64/util.rs b/src/arch/x86_64/util.rs index 5a14e59..e0194a3 100644 --- a/src/arch/x86_64/util.rs +++ b/src/arch/x86_64/util.rs @@ -59,3 +59,23 @@ impl RexBit for Width { self == Width::B64 } } + +pub struct SignedHex { + pub val: i128, + pub op: bool, +} +pub fn signed_hex(val: i128, op: bool) -> SignedHex { + SignedHex { val, op } +} +impl std::fmt::Display for SignedHex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let dsp = self.val.abs(); + let sign = match (self.op, self.val < 0) { + (true, true) => "- ", + (true, false) => "+ ", + (false, true) => "-", + (false, false) => "", + }; + write!(f, "{sign}0x{dsp:x}") + } +} diff --git a/src/parser/nodes/asm/x86_64.rs b/src/parser/nodes/asm/x86_64.rs index d67a97f..6bef902 100644 --- a/src/parser/nodes/asm/x86_64.rs +++ b/src/parser/nodes/asm/x86_64.rs @@ -47,13 +47,21 @@ impl Node for Code { } } -pub fn parse_imm(mut s: &str, span: Span) -> Result { +pub fn parse_imm(mut s: &str, span: Span) -> Result { let mut radix = 10; + let mut mult = 1; + if s.starts_with('-') { + mult = -1; + s = &s[1..]; + } if s.starts_with("0x") { radix = 16; s = &s[2..]; } - u64::from_str_radix(s, radix).map_err(|_| CompilerMsg::from(("invalid immediate", span))) + let abs = u64::from_str_radix(s, radix) + .map_err(|_| CompilerMsg::from(("invalid immediate", span)))?; + let val = (abs as i128) * mult; + Ok(Imm(val)) } pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result {