From 91f5db6950fc3262e978d09f738d7a567a723a8e Mon Sep 17 00:00:00 2001 From: Shadow Cat Date: Thu, 11 Jun 2026 17:39:44 -0400 Subject: [PATCH] idea (doesn't compile) --- src/arch/x86_64/asm.rs | 181 ++++++++++++++++++++++++------------ src/arch/x86_64/encode.rs | 74 ++++++++------- src/arch/x86_64/mod.rs | 1 + src/arch/x86_64/reg.rs | 101 ++++++++++---------- src/arch/x86_64/reg2.rs | 107 +++++++++++++++++++++ src/arch/x86_64/test/bin.rs | 12 +-- src/arch/x86_64/util.rs | 68 ++++++-------- 7 files changed, 349 insertions(+), 195 deletions(-) create mode 100644 src/arch/x86_64/reg2.rs diff --git a/src/arch/x86_64/asm.rs b/src/arch/x86_64/asm.rs index fdb3db6..0c0ef51 100644 --- a/src/arch/x86_64/asm.rs +++ b/src/arch/x86_64/asm.rs @@ -1,7 +1,4 @@ -use crate::{ - arch::x86_64::{Reg, RegH, RegWH, Width, WidthH}, - backend::Symbol, -}; +use crate::{arch::x86_64::*, backend::Symbol}; pub struct Asm { pub instrs: Vec, @@ -9,46 +6,151 @@ pub struct Asm { #[derive(Clone, Copy)] pub enum Instr { - Movr { dst: RegH, src: RegH, width: Width }, - Movi { dst: RegWH, imm: u64 }, - // TODO: horrible - Movm { reg: RegWH, offset: u32, val: u32 }, - Int(u8), + Mov(Mov), Call(Symbol), - Callm(Symbol), + CallM(Symbol), Ret, + Int(u8), Syscall, Lea { dst: RegWH, sym: Symbol }, - Pushr(Reg), - Pushi(u32), + Push(Push), Pop(Reg), Sub, } +#[derive(Clone, Copy)] +pub enum Mov { + RR { dst: RegH, src: RegH, width: Width }, + RI { dst: RegWH, src: u64 }, + RM { dst: RegWH, src: Mem }, + MI { dst: Mem, src: u32 }, + MR { dst: Mem, src: RegWH }, +} + +#[derive(Clone, Copy)] +pub enum Push { + Reg(Reg, Width64), + Mem(Mem), + Imm(u32), +} + #[derive(Clone, Copy)] pub struct Mem { - reg: Reg, - disp: u32, + pub reg: Reg64, + pub disp: u32, } #[derive(Clone, Copy)] pub enum RegImmMem { Reg(RegWH), Imm(u64), + Mem(Mem), } #[derive(Clone, Copy)] -pub enum RegImm { - Reg(Reg), - Imm(u64), +pub enum RegMem { + Reg(RegWH), + Mem(Mem), } +mod fns { + use crate::io::CompilerMsg; + + pub fn mem(reg: RegWH, disp: u32) -> Result { + Ok(Mem { + reg: Reg64 { + reg: reg.reg, + width: reg.width.try_into().map_err(|_| "width must be 32 or 64")?, + }, + disp, + }) + } + + use super::*; + pub fn mov(dst: impl Into, src: impl Into) -> Result { + let dst = dst.into(); + let src = src.into(); + Ok(Instr::Mov(match dst { + RegMem::Reg(dst) => match src { + RegImmMem::Reg(src) => { + if src.width != dst.width { + return Err("src and dst are not same width".into()); + } + Mov::RR { + dst: dst.regh, + src: src.regh, + width: dst.width, + } + } + RegImmMem::Imm(src) => Mov::RI { dst, src }, + RegImmMem::Mem(src) => Mov::RM { src, dst }, + }, + RegMem::Mem(dst) => match src { + RegImmMem::Reg(src) => Mov::MR { dst, src }, + RegImmMem::Imm(src) => { + if src > u32::MAX as u64 { + return Err("cannot move 64 bit immediate into memory".into()); + } + Mov::MI { + dst, + src: src as u32, + } + } + RegImmMem::Mem(_) => return Err("cannot move memory to memory".into()), + }, + })) + } + + pub fn lea(dst: RegWH, sym: Symbol) -> Instr { + Instr::Lea { dst, sym } + } + + pub fn push(reg: impl Into) -> Result { + Ok(Instr::Push(match reg.into() { + RegImmMem::Reg(reg) => match reg.width { + Width::B64 => Push::Reg64(reg.reg), + Width::B16 => Push::Reg16(reg.reg), + _ => return Err("register must be 64 or 16 bit".into()), + }, + RegImmMem::Imm(imm) => match imm.try_into() { + Ok(imm) => Push::Imm(imm), + Err(_) => return Err("immediate must be 32 bit".into()), + }, + RegImmMem::Mem(mem) => Push::Mem(mem), + })) + } + + pub fn pop(reg: RegWH) -> Instr { + assert!(reg.width == Width::B64); + Instr::Pop(reg.reg) + } +} + +// fromrot impl From for RegImmMem { fn from(value: RegWH) -> Self { Self::Reg(value) } } +impl From for RegMem { + fn from(value: RegWH) -> Self { + Self::Reg(value) + } +} + +impl From for RegImmMem { + fn from(value: Mem) -> Self { + Self::Mem(value) + } +} + +impl From for RegMem { + fn from(value: Mem) -> Self { + Self::Mem(value) + } +} + impl From for RegImmMem { fn from(value: u64) -> Self { Self::Imm(value) @@ -67,49 +169,4 @@ impl From for RegImmMem { } } -mod fns { - use crate::io::CompilerMsg; - - use super::*; - pub fn mov(dst: RegWH, src: impl Into) -> Result { - Ok(match src.into() { - RegImmMem::Reg(src) => { - if src.widthh.width() != dst.widthh.width() { - return Err("src and dst are not same width".into()); - } - Instr::Movr { - dst: dst.into(), - src: src.into(), - width: dst.widthh.into(), - } - } - RegImmMem::Imm(imm) => Instr::Movi { dst, imm }, - }) - } - - pub fn lea(dst: RegWH, sym: Symbol) -> Instr { - Instr::Lea { dst, sym } - } - - pub fn push(reg: impl Into) -> Result { - Ok(match reg.into() { - RegImmMem::Reg(reg) => { - if reg.widthh != WidthH::B64 { - return Err("register must be 64 bit".into()); - } - Instr::Pushr(reg.reg) - } - RegImmMem::Imm(imm) => match imm.try_into() { - Ok(imm) => Instr::Pushi(imm), - Err(_) => return Err("immediate must be 32 bit".into()), - }, - }) - } - - pub fn pop(reg: RegWH) -> Instr { - assert!(reg.widthh == WidthH::B64); - Instr::Pop(reg.reg) - } -} - pub use fns::*; diff --git a/src/arch/x86_64/encode.rs b/src/arch/x86_64/encode.rs index 923e157..da26489 100644 --- a/src/arch/x86_64/encode.rs +++ b/src/arch/x86_64/encode.rs @@ -72,23 +72,7 @@ fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMs impl Encoder<'_> { // assembly - pub fn movi(&mut self, dst: RegWH, imm: u64) -> Result<(), CompilerMsg> { - if dst.widthh == WidthH::B16 { - self.data.push(0x66); - } - if dst.requires_rex() { - self.data.push(rex(dst.widthh, 0, 0, dst)); - } - if imm > dst.widthh.max() { - return Err("immediate cannot fit in register".into()); - } - let opcode = 0xb0 | ((dst.widthh.gt8() as u8) << 3); - self.data.push(opcode | dst.base()); - self.data.extend(&imm.to_le_bytes()[..dst.widthh.bytes()]); - Ok(()) - } - - pub fn movr(&mut self, dst: RegH, src: RegH, width: Width) { + pub fn mov_rr(&mut self, dst: RegH, src: RegH, width: Width) { if width == Width::B16 { self.data.push(0x66); } @@ -99,10 +83,30 @@ impl Encoder<'_> { self.data.push(modrm_regs(src, dst)); } - pub fn movm(&mut self, reg: RegWH, offset: u32, val: u32) { - self.data.extend([rex(1, reg, 0, 0), 0xc7]); - self.modrm_regdisp(reg, offset); - self.data.extend(val.to_le_bytes()); + pub fn mov_ri(&mut self, dst: RegWH, src: u64) -> Result<(), CompilerMsg> { + if dst.width == Width::B16 { + self.data.push(0x66); + } + if dst.requires_rex() { + self.data.push(rex(dst.width, 0, 0, dst)); + } + if src > dst.width.max() { + return Err("immediate cannot fit in register".into()); + } + let opcode = 0xb0 | ((dst.width.gt8() as u8) << 3); + self.data.push(opcode | dst.base()); + self.data.extend(&src.to_le_bytes()[..dst.width.bytes()]); + Ok(()) + } + + pub fn mov_rm(&mut self, dst: RegWH, src: Mem) {} + + pub fn mov_mr(&mut self, dst: Mem, src: RegWH) {} + + pub fn mov_mi(&mut self, dst: Mem, src: u32) { + self.data.extend([rex(1, dst.reg, 0, 0), 0xc7]); + self.modrm_regdisp(dst.reg, dst.disp); + self.data.extend(src.to_le_bytes()); } pub fn lea(&mut self, dst: RegWH, sym: Symbol) { @@ -119,12 +123,12 @@ impl Encoder<'_> { self.data.extend([0x0f, 0x05]) } - pub fn call(&mut self, sym: Symbol) { + pub fn call_i(&mut self, sym: Symbol) { self.data.push(0xe8); self.sym_offset4(sym); } - pub fn callm(&mut self, sym: Symbol) { + pub fn call_m(&mut self, sym: Symbol) { self.data.extend([0xff, 0x15]); self.sym_offset4(sym); } @@ -133,14 +137,14 @@ impl Encoder<'_> { self.data.push(0xc3); } - pub fn pushr(&mut self, reg: Reg) { + pub fn push_r(&mut self, reg: Reg, width: Width64) { if reg.gt8() { self.data.push(0x41); } self.data.push(0x50 | reg.base()); } - pub fn pushi(&mut self, imm: u32) { + pub fn push_i(&mut self, imm: u32) { const U8: u32 = 2 << 8; if let 0..U8 = imm { self.data.push(0x6a); @@ -175,17 +179,23 @@ impl Encoder<'_> { pub fn asm(&mut self, instr: Instr) -> Result<(), CompilerMsg> { match instr { - Instr::Movr { dst, src, width } => self.movr(dst, src, width), - Instr::Movi { dst, imm } => self.movi(dst, imm)?, - Instr::Movm { reg, offset, val } => self.movm(reg, offset, val), + Instr::Mov(v) => match v { + Mov::RR { dst, src, width } => self.mov_rr(dst, src, width), + Mov::RI { dst, src } => self.mov_ri(dst, src)?, + Mov::RM { dst, src } => self.mov_rm(dst, src), + Mov::MI { dst, src } => self.mov_mi(dst, src), + Mov::MR { dst, src } => self.mov_mr(dst, src), + }, Instr::Int(code) => self.int(code), Instr::Syscall => self.syscall(), Instr::Lea { dst, sym } => self.lea(dst, sym), - Instr::Call(sym) => self.call(sym), - Instr::Callm(sym) => self.callm(sym), + Instr::Call(sym) => self.call_i(sym), + Instr::CallM(sym) => self.call_m(sym), Instr::Ret => self.ret(), - Instr::Pushr(reg) => self.pushr(reg), - Instr::Pushi(imm) => self.pushi(imm), + Instr::Push(v) => match v { + Push::Reg(reg, width) => self.push_r(reg, width), + Push::Imm(imm) => self.push_i(imm), + }, Instr::Pop(reg) => self.pop(reg), Instr::Sub => self.data.extend([0x48, 0x83, 0xec, 0x28]), } diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index 23a74e5..bbe06fa 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,6 +1,7 @@ mod asm; mod encode; mod reg; +mod reg2; mod test; mod util; diff --git a/src/arch/x86_64/reg.rs b/src/arch/x86_64/reg.rs index 64e0c05..82b6608 100644 --- a/src/arch/x86_64/reg.rs +++ b/src/arch/x86_64/reg.rs @@ -9,8 +9,14 @@ pub struct RegH { #[derive(Clone, Copy)] pub struct RegWH { + pub regh: RegH, + pub width: Width, +} + +#[derive(Clone, Copy)] +pub struct Reg64 { pub reg: Reg, - pub widthh: WidthH, + pub width: Width64, } #[derive(Debug, Clone, Copy, PartialEq)] @@ -21,13 +27,10 @@ pub enum Width { B8, } -/// width that also specifies if high for 8 bit #[derive(Debug, Clone, Copy, PartialEq)] -pub enum WidthH { +pub enum Width64 { B64, B32, - B16, - B8 { high: bool }, } def_regs! { RegWH; @@ -64,21 +67,29 @@ impl Reg { } } -impl RegWH { - pub fn requires_rex(&self) -> bool { - self.gt8() - || self.widthh == WidthH::B64 - || (self.gt4() && self.widthh == WidthH::B8 { high: false }) - } -} - impl RegH { pub fn requires_rex(&self, width: Width) -> bool { self.gt8() || width == Width::B64 || (self.gt4() && width == Width::B8 && !self.high) } } -impl WidthH { +impl RegWH { + const fn new(val: u8, width: Width, high: bool) -> Self { + Self { + regh: RegH { + reg: Reg(val), + high, + }, + width, + } + } + + pub fn requires_rex(&self) -> bool { + self.regh.requires_rex(self.width) + } +} + +impl Width { pub const fn max(&self) -> u64 { match self { Self::B64 => u64::MAX, @@ -95,22 +106,6 @@ impl WidthH { Self::B8 { .. } => 1, } } - /// greater than 8 bits - pub const fn gt8(&self) -> bool { - !matches!(self, Self::B8 { .. }) - } - - pub const fn width(&self) -> Width { - match self { - WidthH::B64 => Width::B64, - WidthH::B32 => Width::B32, - WidthH::B16 => Width::B16, - WidthH::B8 { .. } => Width::B8, - } - } -} - -impl Width { /// greater than 8 bits pub const fn gt8(&self) -> bool { !matches!(self, Self::B8) @@ -129,26 +124,26 @@ impl From for Reg { } } -impl From for RegH { - fn from(value: RegWH) -> Self { - Self { - reg: value.reg, - high: if let WidthH::B8 { high } = value.widthh { - high - } else { - false - }, - } +impl From for Reg { + fn from(value: Reg64) -> Self { + value.reg } } -impl From for Width { - fn from(value: WidthH) -> Self { +impl From for RegH { + fn from(value: RegWH) -> Self { + value.regh + } +} + +impl TryFrom for Width64 { + type Error = (); + + fn try_from(value: Width) -> Result { match value { - WidthH::B64 => Self::B64, - WidthH::B32 => Self::B32, - WidthH::B16 => Self::B16, - WidthH::B8 { .. } => Self::B8, + Width::B64 => Ok(Self::B64), + Width::B32 => Ok(Self::B32), + _ => Err(()), } } } @@ -157,16 +152,16 @@ macro_rules! def_regs { ($Struct: ident; $($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $($B8H:ident=$hval:expr)?,)*) => { $( #[allow(non_upper_case_globals)] - pub const $B64: $Struct = $Struct { reg: Reg($val), widthh: WidthH::B64 }; + pub const $B64: $Struct = $Struct::new($val, Width::B64, false); #[allow(non_upper_case_globals)] - pub const $B32: $Struct = $Struct { reg: Reg($val), widthh: WidthH::B32 }; + pub const $B32: $Struct = $Struct::new($val, Width::B32, false); #[allow(non_upper_case_globals)] - pub const $B16: $Struct = $Struct { reg: Reg($val), widthh: WidthH::B16 }; + pub const $B16: $Struct = $Struct::new($val, Width::B16, false); #[allow(non_upper_case_globals)] - pub const $B8 : $Struct = $Struct { reg: Reg($val), widthh: WidthH::B8 { high: false } }; + pub const $B8 : $Struct = $Struct::new($val, Width::B8 , false); $( #[allow(non_upper_case_globals)] - pub const $B8H: $Struct = $Struct { reg: $hval.reg, widthh: WidthH::B8 { high: true } }; + pub const $B8H: $Struct = $Struct::new($hval.regh.reg.0, Width::B8, true); )? )* impl $Struct { @@ -191,10 +186,10 @@ macro_rules! def_regs { use def_regs; impl std::ops::Deref for RegWH { - type Target = Reg; + type Target = RegH; fn deref(&self) -> &Self::Target { - &self.reg + &self.regh } } diff --git a/src/arch/x86_64/reg2.rs b/src/arch/x86_64/reg2.rs new file mode 100644 index 0000000..b490387 --- /dev/null +++ b/src/arch/x86_64/reg2.rs @@ -0,0 +1,107 @@ +#[derive(Clone, Copy, PartialEq)] +pub struct R8(u8, std::marker::PhantomData); +#[derive(Clone, Copy, PartialEq)] +pub struct R16(u8); +#[derive(Clone, Copy, PartialEq)] +pub struct R32(u8); +#[derive(Clone, Copy, PartialEq)] +pub struct R64(u8); + +pub struct Rex; +pub struct NoRex; +pub struct OptionalRex; + +pub trait MatchRex { + const REX: bool; +} + +impl MatchRex for Rex { + const REX: bool = true; +} +impl MatchRex for OptionalRex { + const REX: bool = true; +} +impl MatchRex for NoRex { + const REX: bool = false; +} +impl MatchRex for OptionalRex { + const REX: bool = false; +} +impl MatchRex for OptionalRex { + const REX: bool = false; +} + +pub enum AsmReg { + R8(R8), + R8Rex(R8), + R8NoRex(R8), + R16(R16), + R32(R32), + R64(R64), +} + +def_regs! { + 0b0000 : rax eax ax =al, + 0b0001 : rcx ecx cx =cl, + 0b0010 : rdx edx dx =dl, + 0b0011 : rbx ebx bx =bl, + + 0b0100 : rsp esp sp rex=spl norex=ah, + 0b0101 : rbp ebp bp rex=bpl norex=ch, + 0b0110 : rsi esi si rex=sil norex=dh, + 0b0111 : rdi edi di rex=dil norex=bh, + + 0b1000 : r8 r8d r8w rex=r8b, + 0b1001 : r9 r9d r9w rex=r9b, + 0b1010 : r10 r10d r10w rex=r10b, + 0b1011 : r11 r11d r11w rex=r11b, + 0b1100 : r12 r12d r12w rex=r12b, + 0b1101 : r13 r13d r13w rex=r13b, + 0b1110 : r14 r14d r14w rex=r14b, + 0b1111 : r15 r15d r15w rex=r15b, +} + +macro_rules! def_regs { + ($($val:literal : $B64:ident $B32:ident $B16:ident $(=$B8:ident)? $(rex=$B8Rex:ident)? $(norex=$B8NoRex:ident)?,)*) => { + $( + #[allow(non_upper_case_globals)] + pub const $B64: R64 = R64($val); + #[allow(non_upper_case_globals)] + pub const $B32: R32 = R32($val); + #[allow(non_upper_case_globals)] + pub const $B16: R16 = R16($val); + + $( + #[allow(non_upper_case_globals)] + pub const $B8: R8 = R8($val, std::marker::PhantomData); + )* + + $( + #[allow(non_upper_case_globals)] + pub const $B8Rex: R8 = R8($val, std::marker::PhantomData); + )? + + $( + #[allow(non_upper_case_globals)] + pub const $B8NoRex: R8 = R8($val, std::marker::PhantomData); + )? + + )* + impl AsmReg { + pub fn parse(s: &str) -> Option { + Some(match s.to_lowercase().as_str() { + $( + stringify!($B64) => Self::R64($B64), + stringify!($B32) => Self::R32($B32), + stringify!($B16) => Self::R16($B16), + $( stringify!($B8 ) => Self::R8($B8), )? + $( stringify!($B8Rex) => Self::R8Rex($B8Rex), )? + $( stringify!($B8NoRex) => Self::R8NoRex($B8NoRex), )? + )* + _ => return None, + }) + } + } + }; +} +use def_regs; diff --git a/src/arch/x86_64/test/bin.rs b/src/arch/x86_64/test/bin.rs index 38ebacf..3e65894 100644 --- a/src/arch/x86_64/test/bin.rs +++ b/src/arch/x86_64/test/bin.rs @@ -91,21 +91,17 @@ fn windows() -> Result<(), CompilerMsg> { Instr::Sub, // stdout mov(ecx, -11)?, - Instr::Callm(get_std_handle), + Instr::CallM(get_std_handle), // write mov(rcx, rax)?, lea(rdx, text_sym), mov(r8d, text.len() as u64)?, lea(r9, written), - Instr::Movm { - reg: rsp, - offset: 0x20, - val: 0, - }, - Instr::Callm(write_file), + mov(mem(rsp, 0x20)?, 0)?, + Instr::CallM(write_file), // exit mov(ecx, 39)?, - Instr::Callm(exit_process), + Instr::CallM(exit_process), ], })], ); diff --git a/src/arch/x86_64/util.rs b/src/arch/x86_64/util.rs index 2be5599..aaf2da3 100644 --- a/src/arch/x86_64/util.rs +++ b/src/arch/x86_64/util.rs @@ -1,45 +1,5 @@ use super::*; -pub trait RexBit { - fn val(self) -> bool; -} - -impl RexBit for u8 { - fn val(self) -> bool { - self != 0 - } -} - -impl RexBit for bool { - fn val(self) -> bool { - self - } -} - -impl RexBit for RegH { - fn val(self) -> bool { - self.gt8() - } -} - -impl RexBit for RegWH { - fn val(self) -> bool { - self.gt8() - } -} - -impl RexBit for WidthH { - fn val(self) -> bool { - self == WidthH::B64 - } -} - -impl RexBit for Width { - fn val(self) -> bool { - self == Width::B64 - } -} - #[inline(always)] pub fn modrm_regs(reg: impl Into, reg_rm: impl Into) -> u8 { modrm(0b11, reg.into().base(), reg_rm.into().base()) @@ -71,3 +31,31 @@ pub fn addr_offset(pos: usize, addr: u64) -> [u8; 4] { let offset = addr as i32 - pos; offset.to_le_bytes() } + +pub trait RexBit { + fn val(self) -> bool; +} + +impl RexBit for u8 { + fn val(self) -> bool { + self != 0 + } +} + +impl RexBit for bool { + fn val(self) -> bool { + self + } +} + +impl> RexBit for R { + fn val(self) -> bool { + self.into().gt8() + } +} + +impl RexBit for Width { + fn val(self) -> bool { + self == Width::B64 + } +}