diff --git a/src/arch/x86_64/asm.rs b/src/arch/x86_64/asm.rs deleted file mode 100644 index 0c0ef51..0000000 --- a/src/arch/x86_64/asm.rs +++ /dev/null @@ -1,172 +0,0 @@ -use crate::{arch::x86_64::*, backend::Symbol}; - -pub struct Asm { - pub instrs: Vec, -} - -#[derive(Clone, Copy)] -pub enum Instr { - Mov(Mov), - Call(Symbol), - CallM(Symbol), - Ret, - Int(u8), - Syscall, - Lea { dst: RegWH, sym: Symbol }, - Push(Push), - Pop(Reg), - Sub, -} - -#[derive(Clone, Copy)] -pub enum Mov { - RR { dst: RegH, src: RegH, width: Width }, - RI { dst: RegWH, src: u64 }, - RM { dst: RegWH, src: Mem }, - MI { dst: Mem, src: u32 }, - MR { dst: Mem, src: RegWH }, -} - -#[derive(Clone, Copy)] -pub enum Push { - Reg(Reg, Width64), - Mem(Mem), - Imm(u32), -} - -#[derive(Clone, Copy)] -pub struct Mem { - pub reg: Reg64, - pub disp: u32, -} - -#[derive(Clone, Copy)] -pub enum RegImmMem { - Reg(RegWH), - Imm(u64), - Mem(Mem), -} - -#[derive(Clone, Copy)] -pub enum RegMem { - Reg(RegWH), - Mem(Mem), -} - -mod fns { - use crate::io::CompilerMsg; - - pub fn mem(reg: RegWH, disp: u32) -> Result { - Ok(Mem { - reg: Reg64 { - reg: reg.reg, - width: reg.width.try_into().map_err(|_| "width must be 32 or 64")?, - }, - disp, - }) - } - - use super::*; - pub fn mov(dst: impl Into, src: impl Into) -> Result { - let dst = dst.into(); - let src = src.into(); - Ok(Instr::Mov(match dst { - RegMem::Reg(dst) => match src { - RegImmMem::Reg(src) => { - if src.width != dst.width { - return Err("src and dst are not same width".into()); - } - Mov::RR { - dst: dst.regh, - src: src.regh, - width: dst.width, - } - } - RegImmMem::Imm(src) => Mov::RI { dst, src }, - RegImmMem::Mem(src) => Mov::RM { src, dst }, - }, - RegMem::Mem(dst) => match src { - RegImmMem::Reg(src) => Mov::MR { dst, src }, - RegImmMem::Imm(src) => { - if src > u32::MAX as u64 { - return Err("cannot move 64 bit immediate into memory".into()); - } - Mov::MI { - dst, - src: src as u32, - } - } - RegImmMem::Mem(_) => return Err("cannot move memory to memory".into()), - }, - })) - } - - pub fn lea(dst: RegWH, sym: Symbol) -> Instr { - Instr::Lea { dst, sym } - } - - pub fn push(reg: impl Into) -> Result { - Ok(Instr::Push(match reg.into() { - RegImmMem::Reg(reg) => match reg.width { - Width::B64 => Push::Reg64(reg.reg), - Width::B16 => Push::Reg16(reg.reg), - _ => return Err("register must be 64 or 16 bit".into()), - }, - RegImmMem::Imm(imm) => match imm.try_into() { - Ok(imm) => Push::Imm(imm), - Err(_) => return Err("immediate must be 32 bit".into()), - }, - RegImmMem::Mem(mem) => Push::Mem(mem), - })) - } - - pub fn pop(reg: RegWH) -> Instr { - assert!(reg.width == Width::B64); - Instr::Pop(reg.reg) - } -} - -// fromrot -impl From for RegImmMem { - fn from(value: RegWH) -> Self { - Self::Reg(value) - } -} - -impl From for RegMem { - fn from(value: RegWH) -> Self { - Self::Reg(value) - } -} - -impl From for RegImmMem { - fn from(value: Mem) -> Self { - Self::Mem(value) - } -} - -impl From for RegMem { - fn from(value: Mem) -> Self { - Self::Mem(value) - } -} - -impl From for RegImmMem { - fn from(value: u64) -> Self { - Self::Imm(value) - } -} - -impl From for RegImmMem { - fn from(value: i64) -> Self { - Self::Imm(value as u64) - } -} - -impl From for RegImmMem { - fn from(value: i32) -> Self { - Self::Imm(value as u32 as u64) - } -} - -pub use fns::*; diff --git a/src/arch/x86_64/compile.rs b/src/arch/x86_64/compile.rs new file mode 100644 index 0000000..53f2b0e --- /dev/null +++ b/src/arch/x86_64/compile.rs @@ -0,0 +1,83 @@ +use std::collections::HashMap; + +use super::*; +use crate::backend::{LibImport, LinkedProgram, SymImport, SymTable, Symbol}; +use util::*; + +pub struct Encoder<'a> { + pub code: Code, + pub sym_tab: SymTable, + pub sym_refs: HashMap>, + pub program: &'a Program, +} + +pub fn compile(p: &Program) -> Result, CompilerMsg> { + let mut encoder = Encoder::new(p); + + p.encode_data(&mut encoder.code.bytes, &mut encoder.sym_tab); + + for f in &p.funcs { + let addr = encoder.code.bytes.len(); + encoder.sym_tab.insert(f.sym, addr as u64); + for instr in &f.instrs { + encoder.compile_instr(instr)?; + } + } + + for (pos, sym) in encoder.code.missing.drain(..) { + let info = encoder.program.sym_info(sym); + if info.external { + encoder.sym_refs.entry(sym).or_default().push(pos); + } else { + let addr = encoder + .sym_tab + .get(sym) + .ok_or(CompilerMsg::from(format!("missing symbol {}", info.name)))?; + encoder.code.bytes[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)) + } + } + + let imports = p + .external + .iter() + .map(|e| LibImport { + name: e.file.clone(), + syms: e + .syms + .iter() + .map(|&s| SymImport { + name: p.sym_info(s).name.clone(), + usages: encoder.sym_refs.entry(s).or_default().clone(), + }) + .collect(), + }) + .collect(); + + Ok(LinkedProgram { + code: encoder.code.bytes, + entry: p.entry.and_then(|e| encoder.sym_tab.get(e)), + imports, + }) +} + +type BInstr = crate::backend::Instr; +impl<'a> Encoder<'a> { + fn compile_instr(&mut self, instr: &BInstr) -> Result<(), CompilerMsg> { + match instr { + BInstr::Asm(asm) => { + self.code.extend(asm); + } + _ => todo!(), + } + Ok(()) + } + + pub fn new(program: &'a Program) -> Self { + Self { + code: Code::default(), + sym_tab: SymTable::new(program.sym_count()), + sym_refs: Default::default(), + program, + } + } +} diff --git a/src/arch/x86_64/encode.rs b/src/arch/x86_64/encode.rs index da26489..435729c 100644 --- a/src/arch/x86_64/encode.rs +++ b/src/arch/x86_64/encode.rs @@ -1,232 +1,249 @@ -use std::collections::HashMap; +use crate::backend::Symbol; use super::*; -use crate::backend::{LibImport, LinkedProgram, SymImport, SymTable, Symbol}; use util::*; -pub struct Encoder<'a> { - pub data: Vec, - pub sym_tab: SymTable, - pub missing: Vec<(usize, Symbol)>, - pub sym_refs: HashMap>, - pub program: &'a Program, +type ERes = Result<(), CompilerMsg>; + +/// machine code +#[derive(Default)] +pub struct Code { + pub(super) bytes: Vec, + pub(super) missing: Vec<(usize, Symbol)>, } -pub fn compile(p: &Program) -> Result, CompilerMsg> { - let mut encoder = Encoder::new(p); - - p.encode_data(&mut encoder.data, &mut encoder.sym_tab); - - for f in &p.funcs { - let addr = encoder.data.len(); - encoder.sym_tab.insert(f.sym, addr as u64); - for instr in &f.instrs { - compile_instr(&mut encoder, instr)?; - } - } - - for (pos, sym) in encoder.missing.drain(..) { - let addr = encoder - .sym_tab - .get(sym) - .ok_or(CompilerMsg::from(format!("missing symbol {sym:?}")))?; - encoder.data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)) - } - - let imports = p - .external - .iter() - .map(|e| LibImport { - name: e.file.clone(), - syms: e - .syms - .iter() - .map(|&s| SymImport { - name: p.sym_info(s).name.clone(), - usages: encoder.sym_refs.entry(s).or_default().clone(), - }) - .collect(), - }) - .collect(); - - Ok(LinkedProgram { - code: encoder.data, - entry: p.entry.and_then(|e| encoder.sym_tab.get(e)), - imports, - }) +#[derive(Clone, Copy)] +pub struct Mem { + pub reg: Reg, + pub disp: u32, } -type BInstr = crate::backend::Instr; -fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMsg> { - match instr { - BInstr::Asm(asm) => { - for i in &asm.instrs { - encoder.asm(*i)?; - } - } - _ => todo!(), - } - Ok(()) +#[derive(Clone, Copy)] +pub enum RegImmMem { + Reg(Reg), + Imm(u64), + Mem(Mem), } -impl Encoder<'_> { - // assembly +#[derive(Clone, Copy)] +pub enum RegMem { + Reg(Reg), + Mem(Mem), +} - pub fn mov_rr(&mut self, dst: RegH, src: RegH, width: Width) { - if width == Width::B16 { - self.data.push(0x66); - } - if src.requires_rex(width) || dst.requires_rex(width) { - self.data.push(rex(width, src, 0, dst)); - } - self.data.push(0x88 | width.gt8() as u8); - self.data.push(modrm_regs(src, dst)); - } +pub fn mem(reg: Reg, disp: u32) -> Mem { + Mem { reg, disp } +} - pub fn mov_ri(&mut self, dst: RegWH, src: u64) -> Result<(), CompilerMsg> { - if dst.width == Width::B16 { - self.data.push(0x66); +impl Code { + pub fn mov(&mut self, dst: impl Into, src: impl Into) -> ERes { + let dst = dst.into(); + let src = src.into(); + match dst { + RegMem::Reg(dst) => match src { + RegImmMem::Reg(src) => { + if dst.width() != src.width() { + return Err("src and dst are not same width".into()); + } + if dst.incompatible(&src) { + return Err("incompatible registers due to rex".into()); + } + let width = dst.width(); + self.prefix16(width); + if src.requires_rex() || dst.requires_rex() { + self.bytes.push(rex(width, src, 0, dst)); + } + self.bytes.push(0x88 | width.gt8() as u8); + self.bytes.push(modrm_regs(src, dst)); + } + RegImmMem::Imm(src) => { + self.prefix16(dst); + if dst.requires_rex() { + self.bytes.push(rex(dst.width(), 0, 0, dst)); + } + if src > dst.width().max() { + return Err("immediate cannot fit in register".into()); + } + let opcode = 0xb0 | ((dst.width().gt8() as u8) << 3); + self.bytes.push(opcode | dst.base()); + self.bytes.extend(&src.to_le_bytes()[..dst.width().bytes()]); + } + RegImmMem::Mem(src) => todo!(), + }, + RegMem::Mem(dst) => match src { + RegImmMem::Reg(src) => todo!(), + RegImmMem::Imm(src) => { + if src > u32::MAX as u64 { + return Err("cannot move 64 bit immediate into memory".into()); + } + + self.bytes.extend([rex(1, dst.reg, 0, 0), 0xc7]); + self.modrm_regdisp(dst.reg, dst.disp); + self.bytes.extend(src.to_le_bytes()); + } + RegImmMem::Mem(_) => return Err("cannot move memory to memory".into()), + }, } - if dst.requires_rex() { - self.data.push(rex(dst.width, 0, 0, dst)); - } - if src > dst.width.max() { - return Err("immediate cannot fit in register".into()); - } - let opcode = 0xb0 | ((dst.width.gt8() as u8) << 3); - self.data.push(opcode | dst.base()); - self.data.extend(&src.to_le_bytes()[..dst.width.bytes()]); Ok(()) } - pub fn mov_rm(&mut self, dst: RegWH, src: Mem) {} - - pub fn mov_mr(&mut self, dst: Mem, src: RegWH) {} - - pub fn mov_mi(&mut self, dst: Mem, src: u32) { - self.data.extend([rex(1, dst.reg, 0, 0), 0xc7]); - self.modrm_regdisp(dst.reg, dst.disp); - self.data.extend(src.to_le_bytes()); + pub fn push(&mut self, reg: impl Into) -> ERes { + match reg.into() { + RegImmMem::Reg(reg) => match reg.width() { + Width::B64 => { + if reg.gt8() { + self.bytes.push(0x41); + } + self.bytes.push(0x50 | reg.base()); + } + Width::B16 => {} + _ => return Err("register must be 64 or 16 bit".into()), + }, + RegImmMem::Imm(imm) => match imm.try_into() { + Ok(imm) => { + const U8: u32 = 2 << 8; + if let 0..U8 = imm { + self.bytes.push(0x6a); + self.bytes.push(imm as u8); + } else { + self.bytes.push(0x68); + self.bytes.extend(imm.to_le_bytes()); + } + } + Err(_) => return Err("immediate must be 32 bit".into()), + }, + RegImmMem::Mem(mem) => todo!(), + } + Ok(()) } - pub fn lea(&mut self, dst: RegWH, sym: Symbol) { - self.data + pub fn pop(&mut self, reg: Reg) -> ERes { + match reg.width() { + Width::B64 | Width::B16 => (), + _ => return Err("register must be 64 or 16 bit".into()), + } + self.prefix16(reg); + if reg.gt8() { + self.bytes.push(0x41); + } + self.bytes.push(0x58 | reg.base()); + Ok(()) + } + + pub fn lea(&mut self, dst: Reg, sym: Symbol) { + self.bytes .extend([rex(1, dst, 0, 0), 0x8d, modrm_disp32(dst)]); self.sym_offset4(sym); } pub fn int(&mut self, code: u8) { - self.data.extend([0xcd, code]) + self.bytes.extend([0xcd, code]) } pub fn syscall(&mut self) { - self.data.extend([0x0f, 0x05]) + self.bytes.extend([0x0f, 0x05]) } - pub fn call_i(&mut self, sym: Symbol) { - self.data.push(0xe8); + pub fn call(&mut self, sym: Symbol) { + self.bytes.push(0xe8); self.sym_offset4(sym); } - pub fn call_m(&mut self, sym: Symbol) { - self.data.extend([0xff, 0x15]); + pub fn call_mem(&mut self, sym: Symbol) { + self.bytes.extend([0xff, 0x15]); self.sym_offset4(sym); } pub fn ret(&mut self) { - self.data.push(0xc3); + self.bytes.push(0xc3); } - pub fn push_r(&mut self, reg: Reg, width: Width64) { - if reg.gt8() { - self.data.push(0x41); + pub fn sub(&mut self) { + // sub esp 40 iirc + self.bytes.extend([0x48, 0x83, 0xec, 0x28]); + } + + fn prefix16(&mut self, width: impl Into) { + if width.into() == Width::B16 { + self.bytes.push(0x66); } - self.data.push(0x50 | reg.base()); } - pub fn push_i(&mut self, imm: u32) { - const U8: u32 = 2 << 8; - if let 0..U8 = imm { - self.data.push(0x6a); - self.data.push(imm as u8); + fn modrm_regdisp(&mut self, reg: Reg, disp: u32) { + let disp8 = disp < u8::MAX as u32; + let mod_ = if disp8 { 0b01 } else { 0b10 }; + self.bytes.push(modrm(mod_, 0, reg.base())); + if reg.val() == rsp.val() { + // SIB + self.bytes.push(0x24); + } + if disp8 { + self.bytes.push(disp as u8); } else { - self.data.push(0x68); - self.data.extend(imm.to_le_bytes()); + self.bytes.extend(disp.to_le_bytes()); } } - pub fn pop(&mut self, reg: Reg) { - if reg.gt8() { - self.data.push(0x41); - } - self.data.push(0x58 | reg.base()); - } - /// inserts a 32 bit offset from a symbol - pub fn sym_offset4(&mut self, sym: Symbol) { - let Some(addr) = self.sym_tab.get(sym) else { - let pos = self.data.len(); - self.data.extend([0; 4]); - if self.program.sym_info(sym).external { - self.sym_refs.entry(sym).or_default().push(pos); - } else { - self.missing.push((pos, sym)); - } - return; - }; - self.data.extend(addr_offset(self.data.len(), addr)); + fn sym_offset4(&mut self, sym: Symbol) { + let pos = self.bytes.len(); + self.bytes.extend([0; 4]); + self.missing.push((pos, sym)); } - pub fn asm(&mut self, instr: Instr) -> Result<(), CompilerMsg> { - match instr { - Instr::Mov(v) => match v { - Mov::RR { dst, src, width } => self.mov_rr(dst, src, width), - Mov::RI { dst, src } => self.mov_ri(dst, src)?, - Mov::RM { dst, src } => self.mov_rm(dst, src), - Mov::MI { dst, src } => self.mov_mi(dst, src), - Mov::MR { dst, src } => self.mov_mr(dst, src), - }, - Instr::Int(code) => self.int(code), - Instr::Syscall => self.syscall(), - Instr::Lea { dst, sym } => self.lea(dst, sym), - Instr::Call(sym) => self.call_i(sym), - Instr::CallM(sym) => self.call_m(sym), - Instr::Ret => self.ret(), - Instr::Push(v) => match v { - Push::Reg(reg, width) => self.push_r(reg, width), - Push::Imm(imm) => self.push_i(imm), - }, - Instr::Pop(reg) => self.pop(reg), - Instr::Sub => self.data.extend([0x48, 0x83, 0xec, 0x28]), - } - Ok(()) - } - - pub fn modrm_regdisp(&mut self, reg: impl Into, disp: u32) { - let reg = reg.into(); - let disp8 = disp < u8::MAX as u32; - let mod_ = if disp8 { 0b01 } else { 0b10 }; - self.data.push(modrm(mod_, 0, reg.base())); - if reg == rsp.reg { - // SIB - self.data.push(0x24); - } - if disp8 { - self.data.push(disp as u8); - } else { - self.data.extend(disp.to_le_bytes()); - } + pub fn extend(&mut self, other: &Code) { + let pos = self.bytes.len(); + self.bytes.extend(&other.bytes); + self.missing + .extend(other.missing.iter().map(|&(p, s)| (pos + p, s))); } } -impl<'a> Encoder<'a> { - pub fn new(program: &'a Program) -> Self { - Self { - data: Default::default(), - sym_tab: SymTable::new(program.sym_count()), - missing: Default::default(), - sym_refs: Default::default(), - program, - } +pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result { + let mut code = Code::default(); + f(&mut code)?; + Ok(code) +} + +// fromrot +impl From for RegImmMem { + fn from(value: Reg) -> Self { + Self::Reg(value) + } +} + +impl From for RegMem { + fn from(value: Reg) -> Self { + Self::Reg(value) + } +} + +impl From for RegImmMem { + fn from(value: Mem) -> Self { + Self::Mem(value) + } +} + +impl From for RegMem { + fn from(value: Mem) -> Self { + Self::Mem(value) + } +} + +impl From for RegImmMem { + fn from(value: u64) -> Self { + Self::Imm(value) + } +} + +impl From for RegImmMem { + fn from(value: i64) -> Self { + Self::Imm(value as u64) + } +} + +impl From for RegImmMem { + fn from(value: i32) -> Self { + Self::Imm(value as u32 as u64) } } diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index bbe06fa..aeb42e9 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,7 +1,6 @@ -mod asm; +mod compile; mod encode; mod reg; -mod reg2; mod test; mod util; @@ -11,7 +10,7 @@ use crate::{ io::CompilerMsg, }; -pub use asm::*; +pub use compile::*; pub use encode::*; pub use reg::*; pub use test::bin::run as bin_test; @@ -20,7 +19,7 @@ pub struct X86_64; impl Arch for X86_64 { const NAME: &str = "x86_64"; - type Asm = Asm; + type Asm = Code; type Addr = u64; fn compile(p: &Program) -> Result, CompilerMsg> { compile(p) diff --git a/src/arch/x86_64/reg.rs b/src/arch/x86_64/reg.rs index 82b6608..e345396 100644 --- a/src/arch/x86_64/reg.rs +++ b/src/arch/x86_64/reg.rs @@ -1,22 +1,8 @@ #[derive(Clone, Copy, PartialEq)] -pub struct Reg(u8); - -#[derive(Clone, Copy)] -pub struct RegH { - pub reg: Reg, - pub high: bool, -} - -#[derive(Clone, Copy)] -pub struct RegWH { - pub regh: RegH, - pub width: Width, -} - -#[derive(Clone, Copy)] -pub struct Reg64 { - pub reg: Reg, - pub width: Width64, +pub struct Reg { + val: u8, + high: bool, + width: Width, } #[derive(Debug, Clone, Copy, PartialEq)] @@ -27,22 +13,16 @@ pub enum Width { B8, } -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Width64 { - B64, - B32, -} +def_regs! { Reg; + 0b0000 : rax eax ax al, + 0b0001 : rcx ecx cx cl, + 0b0010 : rdx edx dx dl, + 0b0011 : rbx ebx bx bl, -def_regs! { RegWH; - 0b0000 : rax eax ax al ah=spl, - 0b0001 : rcx ecx cx cl ch=bpl, - 0b0010 : rdx edx dx dl dh=sil, - 0b0011 : rbx ebx bx bl bh=dil, - - 0b0100 : rsp esp sp spl, - 0b0101 : rbp ebp bp bpl, - 0b0110 : rsi esi si sil, - 0b0111 : rdi edi di dil, + 0b0100 : rsp esp sp spl norex=ah, + 0b0101 : rbp ebp bp bpl norex=ch, + 0b0110 : rsi esi si sil norex=dh, + 0b0111 : rdi edi di dil norex=bh, 0b1000 : r8 r8d r8w r8b, 0b1001 : r9 r9d r9w r9b, @@ -56,36 +36,35 @@ def_regs! { RegWH; impl Reg { pub fn base(&self) -> u8 { - self.0 & 0b111 + self.val & 0b111 } /// checks if register is not one of the first 8 (0-7) pub fn gt8(&self) -> bool { - self.0 >= 0b1000 + self.val >= 0b1000 } pub fn gt4(&self) -> bool { - self.0 >= 0b0100 + self.val >= 0b0100 } -} - -impl RegH { - pub fn requires_rex(&self, width: Width) -> bool { - self.gt8() || width == Width::B64 || (self.gt4() && width == Width::B8 && !self.high) + pub fn high(&self) -> bool { + self.high } -} -impl RegWH { - const fn new(val: u8, width: Width, high: bool) -> Self { - Self { - regh: RegH { - reg: Reg(val), - high, - }, - width, - } + pub fn width(&self) -> Width { + self.width } pub fn requires_rex(&self) -> bool { - self.regh.requires_rex(self.width) + self.gt8() + || self.width == Width::B64 + || (self.gt4() && self.width == Width::B8 && !self.high) + } + + pub fn incompatible(&self, other: &Reg) -> bool { + (self.requires_rex() && other.high) || (self.high && other.requires_rex()) + } + + const fn new(val: u8, width: Width, high: bool) -> Self { + Self { val, high, width } } } @@ -112,44 +91,8 @@ impl Width { } } -impl From for Reg { - fn from(value: RegWH) -> Self { - value.reg - } -} - -impl From for Reg { - fn from(value: RegH) -> Self { - value.reg - } -} - -impl From for Reg { - fn from(value: Reg64) -> Self { - value.reg - } -} - -impl From for RegH { - fn from(value: RegWH) -> Self { - value.regh - } -} - -impl TryFrom for Width64 { - type Error = (); - - fn try_from(value: Width) -> Result { - match value { - Width::B64 => Ok(Self::B64), - Width::B32 => Ok(Self::B32), - _ => Err(()), - } - } -} - macro_rules! def_regs { - ($Struct: ident; $($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $($B8H:ident=$hval:expr)?,)*) => { + ($Struct: ident; $($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $(norex=$B8H:ident)?,)*) => { $( #[allow(non_upper_case_globals)] pub const $B64: $Struct = $Struct::new($val, Width::B64, false); @@ -161,7 +104,7 @@ macro_rules! def_regs { pub const $B8 : $Struct = $Struct::new($val, Width::B8 , false); $( #[allow(non_upper_case_globals)] - pub const $B8H: $Struct = $Struct::new($hval.regh.reg.0, Width::B8, true); + pub const $B8H: $Struct = $Struct::new($val, Width::B8, true); )? )* impl $Struct { @@ -185,18 +128,8 @@ macro_rules! def_regs { use def_regs; -impl std::ops::Deref for RegWH { - type Target = RegH; - - fn deref(&self) -> &Self::Target { - &self.regh - } -} - -impl std::ops::Deref for RegH { - type Target = Reg; - - fn deref(&self) -> &Self::Target { - &self.reg +impl From for Width { + fn from(value: Reg) -> Self { + value.width } } diff --git a/src/arch/x86_64/reg2.rs b/src/arch/x86_64/reg2.rs deleted file mode 100644 index b490387..0000000 --- a/src/arch/x86_64/reg2.rs +++ /dev/null @@ -1,107 +0,0 @@ -#[derive(Clone, Copy, PartialEq)] -pub struct R8(u8, std::marker::PhantomData); -#[derive(Clone, Copy, PartialEq)] -pub struct R16(u8); -#[derive(Clone, Copy, PartialEq)] -pub struct R32(u8); -#[derive(Clone, Copy, PartialEq)] -pub struct R64(u8); - -pub struct Rex; -pub struct NoRex; -pub struct OptionalRex; - -pub trait MatchRex { - const REX: bool; -} - -impl MatchRex for Rex { - const REX: bool = true; -} -impl MatchRex for OptionalRex { - const REX: bool = true; -} -impl MatchRex for NoRex { - const REX: bool = false; -} -impl MatchRex for OptionalRex { - const REX: bool = false; -} -impl MatchRex for OptionalRex { - const REX: bool = false; -} - -pub enum AsmReg { - R8(R8), - R8Rex(R8), - R8NoRex(R8), - R16(R16), - R32(R32), - R64(R64), -} - -def_regs! { - 0b0000 : rax eax ax =al, - 0b0001 : rcx ecx cx =cl, - 0b0010 : rdx edx dx =dl, - 0b0011 : rbx ebx bx =bl, - - 0b0100 : rsp esp sp rex=spl norex=ah, - 0b0101 : rbp ebp bp rex=bpl norex=ch, - 0b0110 : rsi esi si rex=sil norex=dh, - 0b0111 : rdi edi di rex=dil norex=bh, - - 0b1000 : r8 r8d r8w rex=r8b, - 0b1001 : r9 r9d r9w rex=r9b, - 0b1010 : r10 r10d r10w rex=r10b, - 0b1011 : r11 r11d r11w rex=r11b, - 0b1100 : r12 r12d r12w rex=r12b, - 0b1101 : r13 r13d r13w rex=r13b, - 0b1110 : r14 r14d r14w rex=r14b, - 0b1111 : r15 r15d r15w rex=r15b, -} - -macro_rules! def_regs { - ($($val:literal : $B64:ident $B32:ident $B16:ident $(=$B8:ident)? $(rex=$B8Rex:ident)? $(norex=$B8NoRex:ident)?,)*) => { - $( - #[allow(non_upper_case_globals)] - pub const $B64: R64 = R64($val); - #[allow(non_upper_case_globals)] - pub const $B32: R32 = R32($val); - #[allow(non_upper_case_globals)] - pub const $B16: R16 = R16($val); - - $( - #[allow(non_upper_case_globals)] - pub const $B8: R8 = R8($val, std::marker::PhantomData); - )* - - $( - #[allow(non_upper_case_globals)] - pub const $B8Rex: R8 = R8($val, std::marker::PhantomData); - )? - - $( - #[allow(non_upper_case_globals)] - pub const $B8NoRex: R8 = R8($val, std::marker::PhantomData); - )? - - )* - impl AsmReg { - pub fn parse(s: &str) -> Option { - Some(match s.to_lowercase().as_str() { - $( - stringify!($B64) => Self::R64($B64), - stringify!($B32) => Self::R32($B32), - stringify!($B16) => Self::R16($B16), - $( stringify!($B8 ) => Self::R8($B8), )? - $( stringify!($B8Rex) => Self::R8Rex($B8Rex), )? - $( stringify!($B8NoRex) => Self::R8NoRex($B8NoRex), )? - )* - _ => return None, - }) - } - } - }; -} -use def_regs; diff --git a/src/arch/x86_64/test/bin.rs b/src/arch/x86_64/test/bin.rs index 3e65894..72141ce 100644 --- a/src/arch/x86_64/test/bin.rs +++ b/src/arch/x86_64/test/bin.rs @@ -18,34 +18,32 @@ fn linux() -> Result<(), CompilerMsg> { let text_sym2 = program.ro_data("hello_jp", text2); let hello2 = program.func( "hello2", - [BInstr::Asm(Asm { - instrs: vec![ - mov(ax, 1)?, - mov(di, 1)?, - lea(rsi, text_sym2), - mov(dx, text2.len() as u64)?, - Instr::Syscall, - Instr::Ret, - ], - })], + [BInstr::Asm(encode(|c| { + c.mov(ax, 1)?; + c.mov(di, 1)?; + c.lea(rsi, text_sym2); + c.mov(dx, text2.len() as u64)?; + c.syscall(); + c.ret(); + Ok(()) + })?)], ); let entry = program.func( "main", - [BInstr::Asm(Asm { - instrs: vec![ - mov(di, 39)?, - push(rdi)?, - mov(ax, 1)?, - mov(di, 1)?, - lea(rsi, text_sym), - mov(dx, text.len() as u64)?, - Instr::Syscall, - Instr::Call(hello2), - mov(ax, 0x3c)?, - pop(rdi), - Instr::Syscall, - ], - })], + [BInstr::Asm(encode(|c| { + c.mov(rdi, 39)?; + c.push(rdi)?; + c.mov(ax, 1)?; + c.mov(di, 1)?; + c.lea(rsi, text_sym); + c.mov(dx, text.len() as u64)?; + c.syscall(); + c.call(hello2); + c.mov(ax, 0x3c)?; + c.pop(rdi)?; + c.syscall(); + Ok(()) + })?)], ); program.entry = Some(entry); let linked = program.compile().expect("failed to compile"); @@ -86,24 +84,23 @@ fn windows() -> Result<(), CompilerMsg> { let written = program.ro_data("written", [0; 4]); let entry = program.func( "main", - [BInstr::Asm(Asm { - instrs: vec![ - Instr::Sub, - // stdout - mov(ecx, -11)?, - Instr::CallM(get_std_handle), - // write - mov(rcx, rax)?, - lea(rdx, text_sym), - mov(r8d, text.len() as u64)?, - lea(r9, written), - mov(mem(rsp, 0x20)?, 0)?, - Instr::CallM(write_file), - // exit - mov(ecx, 39)?, - Instr::CallM(exit_process), - ], - })], + [BInstr::Asm(encode(|c| { + c.sub(); + // stdout + c.mov(ecx, -11)?; + c.call_mem(get_std_handle); + // write + c.mov(rcx, rax)?; + c.lea(rdx, text_sym); + c.mov(r8d, text.len() as u64)?; + c.lea(r9, written); + c.mov(mem(rsp, 0x20), 0)?; + c.call_mem(write_file); + // exit + c.mov(ecx, 39)?; + c.call_mem(exit_process); + Ok(()) + })?)], ); program.entry = Some(entry); let linked = program.compile().expect("failed to compile"); diff --git a/src/arch/x86_64/test/reg.rs b/src/arch/x86_64/test/reg.rs index 5cfaa4f..abbaae5 100644 --- a/src/arch/x86_64/test/reg.rs +++ b/src/arch/x86_64/test/reg.rs @@ -1,86 +1,82 @@ use super::*; - -fn eq(expected: impl AsRef<[u8]>, asm: Result) { - let expected = expected.as_ref(); - let program = Program::default(); - let mut encoder = Encoder::new(&program); - let asm = match asm { - Ok(v) => v, - Err(e) => { - panic!("expected {expected:x?}, failed to compile: {}", e.msg); +macro_rules! eq { + ($expected:expr, $instr:ident $args:tt $(,)?) => { + let expected = $expected.as_ref(); + let mut code = Code::default(); + let res = code.$instr $args; + let asm = stringify!($instr $args); + if let Err(e) = res { + panic!("{asm}: failed to compile: {}", e.msg); } + let res = &code.bytes[..]; + assert_eq!(expected, res, "{asm}: expected {expected:x?}, got {res:x?}"); }; - if let Err(e) = encoder.asm(asm) { - panic!("expected {expected:x?}, failed to compile: {}", e.msg); - } - let res = encoder.data; - assert_eq!(expected, &res[..], "expected {expected:x?}, got {res:x?}"); } #[test] fn mov_reg_reg() { // used objdump on some nasm compiled assembly - eq([0x48, 0x89, 0xd8], mov(rax, rbx)); - eq([0x89, 0xd8], mov(eax, ebx)); - eq([0x66, 0x89, 0xd8], mov(ax, bx)); - eq([0x88, 0xd8], mov(al, bl)); - eq([0x88, 0xfc], mov(ah, bh)); + eq!([0x48, 0x89, 0xd8], mov(rax, rbx)); + eq!([0x89, 0xd8], mov(eax, ebx)); + eq!([0x66, 0x89, 0xd8], mov(ax, bx)); + eq!([0x88, 0xd8], mov(al, bl)); + eq!([0x88, 0xfc], mov(ah, bh)); - eq([0x88, 0xf8], mov(al, bh)); - eq([0x88, 0xdc], mov(ah, bl)); - eq([0x40, 0x88, 0xe7], mov(dil, spl)); + eq!([0x88, 0xf8], mov(al, bh)); + eq!([0x88, 0xdc], mov(ah, bl)); + eq!([0x40, 0x88, 0xe7], mov(dil, spl)); - eq([0x4d, 0x89, 0xc8], mov(r8, r9)); - eq([0x45, 0x89, 0xc8], mov(r8d, r9d)); - eq([0x66, 0x45, 0x89, 0xc8], mov(r8w, r9w)); - eq([0x45, 0x88, 0xc8], mov(r8b, r9b)); + eq!([0x4d, 0x89, 0xc8], mov(r8, r9)); + eq!([0x45, 0x89, 0xc8], mov(r8d, r9d)); + eq!([0x66, 0x45, 0x89, 0xc8], mov(r8w, r9w)); + eq!([0x45, 0x88, 0xc8], mov(r8b, r9b)); - eq([0x49, 0x89, 0xc0], mov(r8, rax)); - eq([0x4c, 0x89, 0xc0], mov(rax, r8)); - eq([0x4d, 0x89, 0xd1], mov(r9, r10)); + eq!([0x49, 0x89, 0xc0], mov(r8, rax)); + eq!([0x4c, 0x89, 0xc0], mov(rax, r8)); + eq!([0x4d, 0x89, 0xd1], mov(r9, r10)); - eq([0x4d, 0x89, 0xe0], mov(r8, r12)); + eq!([0x4d, 0x89, 0xe0], mov(r8, r12)); - eq([0x89, 0xe0], mov(eax, esp)); - eq([0x89, 0xc4], mov(esp, eax)); + eq!([0x89, 0xe0], mov(eax, esp)); + eq!([0x89, 0xc4], mov(esp, eax)); } #[test] fn mov_reg_imm() { - eq( + eq!( [0x49, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(r15, 0x123456789abcdef0u64), ); - eq( + eq!( [0x49, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(r8, 0x123456789abcdef0u64), ); - eq( + eq!( [0x49, 0xb9, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(r9, 0x123456789abcdef0u64), ); - eq([0x41, 0xb9, 0x78, 0x56, 0x34, 0x12], mov(r9d, 0x12345678)); - eq([0x66, 0x41, 0xb9, 0x34, 0x12], mov(r9w, 0x1234)); - eq([0x41, 0xb1, 0x12], mov(r9b, 0x12)); - eq([0x41, 0xb0, 0x12], mov(r8b, 0x12)); - eq([0x41, 0xb7, 0x12], mov(r15b, 0x12)); + eq!([0x41, 0xb9, 0x78, 0x56, 0x34, 0x12], mov(r9d, 0x12345678)); + eq!([0x66, 0x41, 0xb9, 0x34, 0x12], mov(r9w, 0x1234)); + eq!([0x41, 0xb1, 0x12], mov(r9b, 0x12)); + eq!([0x41, 0xb0, 0x12], mov(r8b, 0x12)); + eq!([0x41, 0xb7, 0x12], mov(r15b, 0x12)); - eq( + eq!( [0x48, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(rax, 0x123456789abcdef0u64), ); - eq( + eq!( [0x48, 0xbb, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(rbx, 0x123456789abcdef0u64), ); - eq( + eq!( [0x48, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], mov(rdi, 0x123456789abcdef0u64), ); - eq([0xbb, 0x78, 0x56, 0x34, 0x12], mov(ebx, 0x12345678)); - eq([0x66, 0xbb, 0x34, 0x12], mov(bx, 0x1234)); - eq([0xb3, 0x12], mov(bl, 0x12)); - eq([0xb7, 0x12], mov(bh, 0x12)); - eq([0xb4, 0x12], mov(ah, 0x12)); - eq([0x40, 0xb7, 0x12], mov(dil, 0x12)); + eq!([0xbb, 0x78, 0x56, 0x34, 0x12], mov(ebx, 0x12345678)); + eq!([0x66, 0xbb, 0x34, 0x12], mov(bx, 0x1234)); + eq!([0xb3, 0x12], mov(bl, 0x12)); + eq!([0xb7, 0x12], mov(bh, 0x12)); + eq!([0xb4, 0x12], mov(ah, 0x12)); + eq!([0x40, 0xb7, 0x12], mov(dil, 0x12)); } diff --git a/src/parser/nodes/asm/mod.rs b/src/parser/nodes/asm/mod.rs index 772e954..24b0a81 100644 --- a/src/parser/nodes/asm/mod.rs +++ b/src/parser/nodes/asm/mod.rs @@ -1,12 +1,12 @@ use crate::{ - arch::x86_64::Asm, + arch::x86_64::Code, parser::{Node, cursor::Token}, }; pub mod x86_64; pub enum AsmBlock { - X86_64(Asm), + X86_64(Code), } impl Node for AsmBlock { diff --git a/src/parser/nodes/asm/x86_64.rs b/src/parser/nodes/asm/x86_64.rs index f0b6cce..d67a97f 100644 --- a/src/parser/nodes/asm/x86_64.rs +++ b/src/parser/nodes/asm/x86_64.rs @@ -7,9 +7,9 @@ use crate::{ }, }; -impl Node for Asm { +impl Node for Code { fn parse(ctx: &mut crate::parser::ParseCtx) -> Result { - let mut instrs = Vec::new(); + let mut c = Code::default(); while let Some(Token::Ident(next)) = ctx.peek() { match next.as_str() { "mov" => { @@ -17,7 +17,7 @@ impl Node for Asm { let dst = parse_reg(ctx)?; ctx.expect(Token::Comma)?; let src = parse_rmi(ctx)?; - instrs.push(mov(dst, src)?); + c.mov(dst, src)?; } "int" => { ctx.next(); @@ -27,7 +27,7 @@ impl Node for Asm { let code = parse_imm(&num, ctx.span)? .try_into() .map_err(|_| CompilerMsg::from("Immediate must be a u8"))?; - instrs.push(Instr::Int(code)); + c.int(code); } _ => { let msg = format!("Unknown instruction {next}"); @@ -39,7 +39,7 @@ impl Node for Asm { } } } - Ok(Self { instrs }) + Ok(c) } fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result { @@ -60,17 +60,17 @@ pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result RegImmMem::Reg(RegWH::parse(ident).ok_or_else(err)?), + Token::Ident(ident) => RegImmMem::Reg(Reg::parse(ident).ok_or_else(err)?), Token::Lit(LitTy::Number(num)) => RegImmMem::Imm(parse_imm(num, ctx.span)?), _ => return Err(err()), }) } -pub fn parse_reg(ctx: &mut crate::parser::ParseCtx) -> Result { +pub fn parse_reg(ctx: &mut crate::parser::ParseCtx) -> Result { let next = ctx.expect_next()?; let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register"); let Token::Ident(next) = &next else { return Err(err()); }; - RegWH::parse(next).ok_or_else(err) + Reg::parse(next).ok_or_else(err) } diff --git a/x86_64_test b/x86_64_test index ae8f616..5ce75c5 100755 Binary files a/x86_64_test and b/x86_64_test differ