diff --git a/src/arch/mod.rs b/src/arch/mod.rs new file mode 100644 index 0000000..5d31702 --- /dev/null +++ b/src/arch/mod.rs @@ -0,0 +1,12 @@ +use crate::{ + backend::{LinkedProgram, Program}, + io::CompilerMsg, +}; + +pub mod x86_64; + +pub trait Arch: Sized { + const NAME: &str; + type Asm; + fn compile(p: &Program) -> Result; +} diff --git a/src/arch/x86_64/asm.rs b/src/arch/x86_64/asm.rs new file mode 100644 index 0000000..1e9101c --- /dev/null +++ b/src/arch/x86_64/asm.rs @@ -0,0 +1,46 @@ +use crate::{arch::x86_64::RegMode, backend::Symbol}; + +pub struct Asm { + pub instrs: Vec, +} + +#[derive(Clone, Copy)] +pub enum Instr { + Mov { dst: RegMode, src: RegImm }, + Int { code: u8 }, + Syscall, + Lea { dst: RegMode, sym: Symbol }, +} + +#[derive(Clone, Copy)] +pub enum RegImm { + Reg(RegMode), + Imm(u64), +} + +impl From for RegImm { + fn from(value: RegMode) -> Self { + Self::Reg(value) + } +} + +impl From for RegImm { + fn from(value: u64) -> Self { + Self::Imm(value) + } +} + +pub fn mov(dst: RegMode, src: impl Into) -> Instr { + Instr::Mov { + dst, + src: src.into(), + } +} + +pub fn lea(dst: RegMode, sym: Symbol) -> Instr { + Instr::Lea { dst, sym } +} + +pub fn int(code: u8) -> Instr { + Instr::Int { code } +} diff --git a/src/arch/x86_64/encode.rs b/src/arch/x86_64/encode.rs new file mode 100644 index 0000000..2574651 --- /dev/null +++ b/src/arch/x86_64/encode.rs @@ -0,0 +1,141 @@ +use super::*; +use crate::backend::{Addr, LinkedProgram, SymTable, Symbol}; + +pub struct Encoder { + pub data: Vec, + pub sym_tab: SymTable, + pub missing: Vec<(usize, Symbol)>, +} + +pub fn encode_program(p: &Program) -> Result { + let mut encoder = Encoder::new(p.sym_count()); + + p.encode_data(&mut encoder.data, &mut encoder.sym_tab); + + for f in &p.funcs { + let addr = encoder.data.len(); + encoder.sym_tab.insert(f.sym, addr as u64); + for instr in &f.instrs { + compile_instr(&mut encoder, instr)?; + } + } + + for (pos, sym) in encoder.missing.drain(..) { + let addr = encoder + .sym_tab + .get(sym) + .ok_or(CompilerMsg::from(format!("unknown symbol {sym:?}")))?; + encoder.data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)) + } + + Ok(LinkedProgram { + code: encoder.data, + entry: p.entry.and_then(|e| encoder.sym_tab.get(e)), + }) +} + +type BInstr = crate::backend::Instr; +fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMsg> { + match instr { + BInstr::Copy { dst, src } => todo!(), + BInstr::Asm(asm) => { + for i in &asm.instrs { + encoder.asm(*i)?; + } + } + } + Ok(()) +} + +impl Encoder { + // assembly + + pub fn mov(&mut self, dst: RegMode, src: impl Into) -> Result<(), CompilerMsg> { + let src = src.into(); + let width = dst.width; + if width == BitWidth::B16 { + self.data.push(0x66); + } + let dst8 = dst.gt8(); + let b64 = width == BitWidth::B64; + let b8 = width == BitWidth::B8; + let src8 = if let RegImm::Reg(src) = src { + src.gt8() + } else { + false + }; + // special 64-bit / register 4-7 indicator + if dst8 || src8 || b64 || (dst.gt4() && !dst.high) { + self.data + .push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2)); + } + match src { + RegImm::Reg(src) => { + if dst.width != src.width { + return Err("src and dst are not the same size".into()); + } + self.data.push(0x88 | !b8 as u8); + let modrm = 0b11_000_000 | (src.base() << 3) | dst.base(); + self.data.push(modrm); + } + RegImm::Imm(imm) => { + if imm > width.max() { + return Err("immediate cannot fit in register".into()); + } + self.data.push(0xb0 | ((!b8 as u8) << 3) | dst.base()); + self.data.extend(&imm.to_le_bytes()[..width.bytes()]); + } + } + Ok(()) + } + + pub fn lea(&mut self, dst: RegMode, sym: Symbol) { + self.data.extend([ + 0x48 | ((dst.gt8() as u8) << 2), + 0x8d, + 0x05 | (dst.base() << 3), + ]); + let Some(addr) = self.sym_tab.get(sym) else { + let pos = self.data.len(); + self.data.extend([0; 4]); + self.missing.push((pos, sym)); + return; + }; + self.data.extend(addr_offset(self.data.len(), addr)); + } + + pub fn int(&mut self, code: u8) { + self.data.extend([0xcd, code]) + } + + pub fn syscall(&mut self) { + self.data.extend([0x0f, 0x05]) + } + + pub fn asm(&mut self, instr: Instr) -> Result<(), CompilerMsg> { + match instr { + Instr::Mov { dst, src } => self.mov(dst, src)?, + Instr::Int { code } => self.int(code), + Instr::Syscall => self.syscall(), + Instr::Lea { dst, sym } => self.lea(dst, sym), + } + Ok(()) + } +} + +/// assumes the next instruction is directly after +fn addr_offset(pos: usize, addr: Addr) -> [u8; 4] { + let pos = (pos + 4) as i32; + let offset = addr as i32 - pos; + offset.to_le_bytes() +} + +impl Encoder { + pub fn new(sym_count: usize) -> Self { + Self { + data: Default::default(), + sym_tab: SymTable::new(sym_count), + missing: Default::default(), + } + } +} diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs new file mode 100644 index 0000000..5b6c633 --- /dev/null +++ b/src/arch/x86_64/mod.rs @@ -0,0 +1,25 @@ +mod asm; +mod encode; +mod reg; +#[cfg(test)] +mod test; + +use crate::{ + arch::Arch, + backend::{LinkedProgram, Program}, + io::CompilerMsg, +}; + +pub use asm::*; +pub use encode::*; +pub use reg::*; + +pub struct X86_64; + +impl Arch for X86_64 { + const NAME: &str = "x86_64"; + type Asm = Asm; + fn compile(p: &Program) -> Result { + encode_program(p) + } +} diff --git a/src/arch/x86_64/reg.rs b/src/arch/x86_64/reg.rs new file mode 100644 index 0000000..7a6882f --- /dev/null +++ b/src/arch/x86_64/reg.rs @@ -0,0 +1,106 @@ +#[derive(Clone, Copy)] +pub struct Reg(u8); + +#[derive(Clone, Copy)] +pub struct RegMode { + pub reg: Reg, + pub width: BitWidth, + pub high: bool, +} + +#[derive(Clone, Copy, PartialEq)] +pub enum BitWidth { + B64, + B32, + B16, + B8, +} + +impl RegMode { + pub fn base(&self) -> u8 { + self.reg.0 & 0b111 + } + /// checks if register is not one of the first 8 (0-7) + pub fn gt8(&self) -> bool { + self.reg.0 >= 0b1000 + } + pub fn gt4(&self) -> bool { + self.reg.0 >= 0b0100 + } +} + +def_regs! { + 0b0000 : rax eax ax al ah=spl, + 0b0001 : rcx ecx cx cl ch=bpl, + 0b0010 : rdx edx dx dl dh=sil, + 0b0011 : rbx ebx bx bl bh=dil, + + 0b0100 : rsp esp sp spl, + 0b0101 : rbp ebp bp bpl, + 0b0110 : rsi esi si sil, + 0b0111 : rdi edi di dil, + + 0b1000 : r8 r8d r8w r8b, + 0b1001 : r9 r9d r9w r9b, + 0b1010 : r10 r10d r10w r10b, + 0b1011 : r11 r11d r11w r11b, + 0b1100 : r12 r12d r12w r12b, + 0b1101 : r13 r13d r13w r13b, + 0b1110 : r14 r14d r14w r14b, + 0b1111 : r15 r15d r15w r15b, +} + +impl BitWidth { + pub const fn max(&self) -> u64 { + match self { + Self::B64 => u64::MAX, + Self::B32 => u32::MAX as u64, + Self::B16 => u16::MAX as u64, + Self::B8 => u8::MAX as u64, + } + } + pub const fn bytes(&self) -> usize { + match self { + Self::B64 => 8, + Self::B32 => 4, + Self::B16 => 2, + Self::B8 => 1, + } + } +} + +macro_rules! def_regs { + ($($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $($B8H:ident=$hval:expr)?,)*) => { + $( + #[allow(non_upper_case_globals)] + pub const $B64: RegMode = RegMode { reg: Reg($val), width: BitWidth::B64, high: false }; + #[allow(non_upper_case_globals)] + pub const $B32: RegMode = RegMode { reg: Reg($val), width: BitWidth::B32, high: false }; + #[allow(non_upper_case_globals)] + pub const $B16: RegMode = RegMode { reg: Reg($val), width: BitWidth::B16, high: false }; + #[allow(non_upper_case_globals)] + pub const $B8 : RegMode = RegMode { reg: Reg($val), width: BitWidth::B8, high: false }; + $( + #[allow(non_upper_case_globals)] + pub const $B8H: RegMode = RegMode { reg: $hval.reg, width: BitWidth::B8, high: true }; + )? + )* + impl RegMode { + pub fn parse(s: &str) -> Option { + Some(match s.to_lowercase().as_str() { + $( + stringify!($B64) => $B64, + stringify!($B32) => $B32, + stringify!($B16) => $B16, + stringify!($B8 ) => $B8, + $( + stringify!($B8H) => $B8H, + )? + )* + _ => return None, + }) + } + } + }; +} +use def_regs; diff --git a/src/arch/x86_64/test.rs b/src/arch/x86_64/test.rs new file mode 100644 index 0000000..8152aee --- /dev/null +++ b/src/arch/x86_64/test.rs @@ -0,0 +1,76 @@ +use super::*; + +fn eq(expected: impl AsRef<[u8]>, asm: Instr) { + let expected = expected.as_ref(); + let mut encoder = Encoder::new(0); + if let Err(e) = encoder.asm(asm) { + panic!("expected {expected:x?}, failed to compile: {}", e.msg); + } + let res = encoder.data; + assert_eq!(expected, &res[..], "expected {expected:x?}, got {res:x?}"); +} + +#[test] +fn reg_reg() { + // used objdump on some nasm compiled assembly + eq([0x48, 0x89, 0xd8], mov(rax, rbx)); + eq([0x89, 0xd8], mov(eax, ebx)); + eq([0x66, 0x89, 0xd8], mov(ax, bx)); + eq([0x88, 0xd8], mov(al, bl)); + eq([0x88, 0xfc], mov(ah, bh)); + + eq([0x88, 0xf8], mov(al, bh)); + eq([0x88, 0xdc], mov(ah, bl)); + eq([0x40, 0x88, 0xe7], mov(dil, spl)); + + eq([0x4d, 0x89, 0xc8], mov(r8, r9)); + eq([0x45, 0x89, 0xc8], mov(r8d, r9d)); + eq([0x66, 0x45, 0x89, 0xc8], mov(r8w, r9w)); + eq([0x45, 0x88, 0xc8], mov(r8b, r9b)); + + eq([0x49, 0x89, 0xc0], mov(r8, rax)); + eq([0x4c, 0x89, 0xc0], mov(rax, r8)); + eq([0x4d, 0x89, 0xd1], mov(r9, r10)); + + eq([0x4d, 0x89, 0xe0], mov(r8, r12)); +} + +#[test] +fn reg_imm() { + eq( + [0x49, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], + mov(r15, 0x123456789abcdef0), + ); + eq( + [0x49, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], + mov(r8, 0x123456789abcdef0), + ); + eq( + [0x49, 0xb9, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], + mov(r9, 0x123456789abcdef0), + ); + eq([0x41, 0xb9, 0x78, 0x56, 0x34, 0x12], mov(r9d, 0x12345678)); + eq([0x66, 0x41, 0xb9, 0x34, 0x12], mov(r9w, 0x1234)); + eq([0x41, 0xb1, 0x12], mov(r9b, 0x12)); + eq([0x41, 0xb0, 0x12], mov(r8b, 0x12)); + eq([0x41, 0xb7, 0x12], mov(r15b, 0x12)); + + eq( + [0x48, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], + mov(rax, 0x123456789abcdef0), + ); + eq( + [0x48, 0xbb, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], + mov(rbx, 0x123456789abcdef0), + ); + eq( + [0x48, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], + mov(rdi, 0x123456789abcdef0), + ); + eq([0xbb, 0x78, 0x56, 0x34, 0x12], mov(ebx, 0x12345678)); + eq([0x66, 0xbb, 0x34, 0x12], mov(bx, 0x1234)); + eq([0xb3, 0x12], mov(bl, 0x12)); + eq([0xb7, 0x12], mov(bh, 0x12)); + eq([0xb4, 0x12], mov(ah, 0x12)); + eq([0x40, 0xb7, 0x12], mov(dil, 0x12)); +} diff --git a/src/backend/arch/mod.rs b/src/backend/arch/mod.rs deleted file mode 100644 index 2a99bf5..0000000 --- a/src/backend/arch/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod x86_64; diff --git a/src/backend/arch/x86_64.rs b/src/backend/arch/x86_64.rs deleted file mode 100644 index 36283e4..0000000 --- a/src/backend/arch/x86_64.rs +++ /dev/null @@ -1,314 +0,0 @@ -use crate::{ - backend::{ - program::{Addr, SymTable}, - symbol::Symbol, - }, - io::CompilerMsg, -}; - -pub struct Asm { - pub instrs: Vec, -} - -pub enum Instr { - Mov { dst: RegMode, src: RegImm }, - Int { code: u8 }, - Syscall, - Lea { dst: RegMode, sym: Symbol }, -} - -pub enum RegImm { - Reg(RegMode), - Imm(u64), -} - -pub struct Reg(u8); - -pub struct RegMode { - reg: Reg, - width: BitWidth, - high: bool, -} - -impl super::super::program::Instr for Instr { - fn encode( - &self, - data: &mut Vec, - syms: &mut SymTable, - ) -> Result, CompilerMsg> { - match self { - Instr::Mov { dst, src } => { - let width = dst.width; - if width == BitWidth::B16 { - data.push(0x66); - } - let dst8 = dst.gt8(); - let b64 = width == BitWidth::B64; - let b8 = width == BitWidth::B8; - let src8 = if let RegImm::Reg(src) = src { - src.gt8() - } else { - false - }; - // special 64-bit / register 4-7 indicator - if dst8 || src8 || b64 || (dst.gt4() && !dst.high) { - data.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2)); - } - match src { - RegImm::Reg(src) => { - if dst.width != src.width { - return Err("src and dst are not the same size".into()); - } - data.push(0x88 | !b8 as u8); - let modrm = 0b11_000_000 | (src.base() << 3) | dst.base(); - data.push(modrm); - } - &RegImm::Imm(imm) => { - if imm > width.max() { - return Err("immediate cannot fit in register".into()); - } - data.push(0xb0 | ((!b8 as u8) << 3) | dst.base()); - data.extend(&imm.to_le_bytes()[..width.bytes()]); - } - } - } - Instr::Int { code } => data.extend([0xcd, *code]), - Instr::Lea { dst, sym } => { - data.extend([ - 0x48 | ((dst.gt8() as u8) << 2), - 0x8d, - 0x05 | (dst.base() << 3), - ]); - let Some(addr) = syms.get(*sym) else { - let pos = data.len(); - data.extend([0; 4]); - return Ok(Some((*sym, pos))); - }; - data.extend(addr_offset(data.len(), addr)); - } - Instr::Syscall => data.extend([0x0f, 0x05]), - } - Ok(None) - } - fn insert_sym(&self, data: &mut Vec, pos: usize, addr: Addr) { - match self { - Self::Lea { .. } => data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)), - _ => panic!("unkown symbol insertion"), - } - } -} - -/// assumes the next instruction is directly after -fn addr_offset(pos: usize, addr: Addr) -> [u8; 4] { - let pos = (pos + 4) as i32; - let offset = addr.val() as i32 - pos; - offset.to_le_bytes() -} - -impl RegMode { - pub fn base(&self) -> u8 { - self.reg.0 & 0b111 - } - /// checks if register is not one of the first 8 (0-7) - pub fn gt8(&self) -> bool { - self.reg.0 >= 0b1000 - } - pub fn gt4(&self) -> bool { - self.reg.0 >= 0b0100 - } -} - -macro_rules! def_regs { - ($($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $($B8H:ident=$hval:expr)?,)*) => { - #[allow(non_upper_case_globals)] - pub mod reg { - use super::{RegMode, BitWidth, Reg}; - $( - pub const $B64: RegMode = RegMode { reg: Reg($val), width: BitWidth::B64, high: false }; - pub const $B32: RegMode = RegMode { reg: Reg($val), width: BitWidth::B32, high: false }; - pub const $B16: RegMode = RegMode { reg: Reg($val), width: BitWidth::B16, high: false }; - pub const $B8 : RegMode = RegMode { reg: Reg($val), width: BitWidth::B8, high: false }; - $( - pub const $B8H: RegMode = RegMode { reg: $hval.reg, width: BitWidth::B8, high: true }; - )? - )* - } - impl RegMode { - pub fn parse(s: &str) -> Option { - Some(match s.to_lowercase().as_str() { - $( - stringify!($B64) => reg::$B64, - stringify!($B32) => reg::$B32, - stringify!($B16) => reg::$B16, - stringify!($B8 ) => reg::$B8, - $( - stringify!($B8H) => reg::$B8H, - )? - )* - _ => return None, - }) - } - } - }; -} - -def_regs! { - 0b0000 : rax eax ax al ah=spl, - 0b0001 : rcx ecx cx cl ch=bpl, - 0b0010 : rdx edx dx dl dh=sil, - 0b0011 : rbx ebx bx bl bh=dil, - - 0b0100 : rsp esp sp spl, - 0b0101 : rbp ebp bp bpl, - 0b0110 : rsi esi si sil, - 0b0111 : rdi edi di dil, - - 0b1000 : r8 r8d r8w r8b, - 0b1001 : r9 r9d r9w r9b, - 0b1010 : r10 r10d r10w r10b, - 0b1011 : r11 r11d r11w r11b, - 0b1100 : r12 r12d r12w r12b, - 0b1101 : r13 r13d r13w r13b, - 0b1110 : r14 r14d r14w r14b, - 0b1111 : r15 r15d r15w r15b, -} - -#[derive(Clone, Copy, PartialEq)] -pub enum BitWidth { - B64, - B32, - B16, - B8, -} - -impl BitWidth { - pub const fn max(&self) -> u64 { - match self { - Self::B64 => u64::MAX, - Self::B32 => u32::MAX as u64, - Self::B16 => u16::MAX as u64, - Self::B8 => u8::MAX as u64, - } - } - pub const fn bytes(&self) -> usize { - match self { - Self::B64 => 8, - Self::B32 => 4, - Self::B16 => 2, - Self::B8 => 1, - } - } -} - -pub mod instr { - use super::*; - pub fn mov(dst: RegMode, src: impl Into) -> Instr { - Instr::Mov { - dst, - src: src.into(), - } - } - - pub fn lea(dst: RegMode, sym: Symbol) -> Instr { - Instr::Lea { dst, sym } - } - - pub fn int(code: u8) -> Instr { - Instr::Int { code } - } -} - -impl From for RegImm { - fn from(value: RegMode) -> Self { - Self::Reg(value) - } -} - -impl From for RegImm { - fn from(value: u64) -> Self { - Self::Imm(value) - } -} - -#[cfg(test)] -mod test { - use crate::backend::program::Instr as _; - - use super::*; - use instr::*; - use reg::*; - - fn eq(expected: impl AsRef<[u8]>, got: Instr) { - let expected = expected.as_ref(); - let mut res = Vec::new(); - if let Err(e) = got.encode(&mut res, &mut SymTable::new(0)) { - panic!("expected {expected:x?}, failed to compile: {}", e.msg); - } - assert_eq!(expected, &res[..], "expected {expected:x?}, got {res:x?}"); - } - - #[test] - fn reg_reg() { - // used objdump on some nasm compiled assembly - eq([0x48, 0x89, 0xd8], mov(rax, rbx)); - eq([0x89, 0xd8], mov(eax, ebx)); - eq([0x66, 0x89, 0xd8], mov(ax, bx)); - eq([0x88, 0xd8], mov(al, bl)); - eq([0x88, 0xfc], mov(ah, bh)); - - eq([0x88, 0xf8], mov(al, bh)); - eq([0x88, 0xdc], mov(ah, bl)); - eq([0x40, 0x88, 0xe7], mov(dil, spl)); - - eq([0x4d, 0x89, 0xc8], mov(r8, r9)); - eq([0x45, 0x89, 0xc8], mov(r8d, r9d)); - eq([0x66, 0x45, 0x89, 0xc8], mov(r8w, r9w)); - eq([0x45, 0x88, 0xc8], mov(r8b, r9b)); - - eq([0x49, 0x89, 0xc0], mov(r8, rax)); - eq([0x4c, 0x89, 0xc0], mov(rax, r8)); - eq([0x4d, 0x89, 0xd1], mov(r9, r10)); - - eq([0x4d, 0x89, 0xe0], mov(r8, r12)); - } - - #[test] - fn reg_imm() { - eq( - [0x49, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], - mov(r15, 0x123456789abcdef0), - ); - eq( - [0x49, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], - mov(r8, 0x123456789abcdef0), - ); - eq( - [0x49, 0xb9, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], - mov(r9, 0x123456789abcdef0), - ); - eq([0x41, 0xb9, 0x78, 0x56, 0x34, 0x12], mov(r9d, 0x12345678)); - eq([0x66, 0x41, 0xb9, 0x34, 0x12], mov(r9w, 0x1234)); - eq([0x41, 0xb1, 0x12], mov(r9b, 0x12)); - eq([0x41, 0xb0, 0x12], mov(r8b, 0x12)); - eq([0x41, 0xb7, 0x12], mov(r15b, 0x12)); - - eq( - [0x48, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], - mov(rax, 0x123456789abcdef0), - ); - eq( - [0x48, 0xbb, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], - mov(rbx, 0x123456789abcdef0), - ); - eq( - [0x48, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12], - mov(rdi, 0x123456789abcdef0), - ); - eq([0xbb, 0x78, 0x56, 0x34, 0x12], mov(ebx, 0x12345678)); - eq([0x66, 0xbb, 0x34, 0x12], mov(bx, 0x1234)); - eq([0xb3, 0x12], mov(bl, 0x12)); - eq([0xb7, 0x12], mov(bh, 0x12)); - eq([0xb4, 0x12], mov(ah, 0x12)); - eq([0x40, 0xb7, 0x12], mov(dil, 0x12)); - } -} diff --git a/src/backend/elf.rs b/src/backend/elf.rs index c0bf45b..bdea22d 100644 --- a/src/backend/elf.rs +++ b/src/backend/elf.rs @@ -103,7 +103,7 @@ pub fn create(program: &[u8], start_offset: Addr) -> Vec { ty: if pie { EType::Dyn } else { EType::Exec } as u16, machine: Arch::X86_64.machine(), e_version: 0x1, - entry: addr_start + program_pos + start_offset.val(), + entry: addr_start + program_pos + start_offset as u64, program_header_offset: size_of::() as u64, section_header_offset: 0x0, // C ABI (16 bit instruction align) + double precision floats diff --git a/src/backend/elf_sections.rs b/src/backend/elf_sections.rs deleted file mode 100644 index 6ab360c..0000000 --- a/src/backend/elf_sections.rs +++ /dev/null @@ -1,145 +0,0 @@ -use crate::backend::Addr; - -#[repr(C)] -pub struct ELF64Header { - magic: u32, - class: u8, - endianness: u8, - ei_version: u8, - os_abi: u8, - os_abi_ver: u8, - pad: [u8; 7], - ty: u16, - machine: u16, - e_version: u32, - entry: u64, - program_header_offset: u64, - section_header_offset: u64, - flags: u32, - header_size: u16, - program_header_entry_size: u16, - program_header_num: u16, - section_header_entry_size: u16, - section_header_num: u16, - section_header_str_idx: u16, -} - -#[repr(C)] -pub struct ProgramHeader { - ty: u32, - flags: u32, - offset: u64, - vaddr: u64, - paddr: u64, - filesz: u64, - memsz: u64, - align: u64, -} - -#[repr(C)] -pub struct SectionHeader { - name_idx: u32, - ty: u32, - flags: u64, - addr: u64, - offset: u64, - size: u64, - link: u32, - info: u32, - addr_align: u64, - entry_size: u64, -} - -pub enum Arch { - X86_64, - Riscv, -} - -impl Arch { - pub fn machine(&self) -> u16 { - match self { - Arch::X86_64 => 0x3e, - Arch::Riscv => 0xf3, - } - } -} - -// this is currently specialized for riscv64; obviously add params later -pub fn create(program: &[u8], start_offset: Addr) -> Vec { - let addr_start = 0x400000; - let page_size = 0x1000; - // I don't know if I have to add addr_start here, idk how it maps the memory - let program_size = std::mem::size_of_val(program) as u64; - let program_header = ProgramHeader { - ty: 0x1, // LOAD - flags: 0b101, // executable, readable - offset: 0x0, - vaddr: addr_start, - paddr: 0x0, - filesz: program_size, - memsz: program_size, - align: page_size, - }; - let header_len = (size_of::() + size_of::()) as u64; - let program_pos = header_len; - let header = ELF64Header { - magic: 0x7f_45_4c_46u32.swap_bytes(), - class: 0x2, // 64 bit - endianness: 0x1, // little endian - ei_version: 0x1, - os_abi: 0x0, // system-v - os_abi_ver: 0x0, - pad: [0x0; 7], - ty: 0x2, // executable - machine: Arch::X86_64.machine(), - e_version: 0x1, - entry: addr_start + program_pos + start_offset.val(), - program_header_offset: size_of::() as u64, - section_header_offset: program_pos + program.len() as u64, - // C ABI (16 bit instruction align) + double precision floats - flags: 0x1 | 0x4, - header_size: size_of::() as u16, - program_header_entry_size: size_of::() as u16, - program_header_num: 0x1, - section_header_entry_size: size_of::() as u16, - section_header_num: 0x2, - section_header_str_idx: 0x0, - }; - let section_null = SectionHeader { - name_idx: 0, - ty: 0, - flags: 0, - addr: 0, - offset: 0, - size: 0, - link: 0, - info: 0, - addr_align: 0, - entry_size: 0, - }; - let section = SectionHeader { - name_idx: 0, - ty: 0x1, // program data - flags: (1 << 1) | (1 << 2), // alloc & exec - addr: addr_start, - offset: program_pos, - size: program.len() as u64, - link: 0, - info: 0, - addr_align: 1, - entry_size: 0, - }; - let mut bytes: Vec = Vec::new(); - unsafe { - bytes.extend(as_u8_slice(&header)); - bytes.extend(as_u8_slice(&program_header)); - bytes.extend(program); - bytes.extend(as_u8_slice(§ion_null)); - bytes.extend(as_u8_slice(§ion)); - } - bytes -} - -unsafe fn as_u8_slice(p: &T) -> &[u8] { - unsafe { core::slice::from_raw_parts((p as *const T) as *const u8, size_of::()) } -} diff --git a/src/backend/ir/mod.rs b/src/backend/ir/mod.rs new file mode 100644 index 0000000..21890f8 --- /dev/null +++ b/src/backend/ir/mod.rs @@ -0,0 +1,79 @@ +mod symbol; +pub use symbol::*; + +use crate::{arch::Arch, backend::LinkedProgram, io::CompilerMsg}; + +pub struct Program { + pub ro_data: Vec, + pub funcs: Vec>, + pub entry: Option, + + sym_count: usize, +} + +pub struct Data { + pub bytes: Vec, + pub sym: Symbol, +} + +pub struct Func { + pub instrs: Vec>, + pub sym: Symbol, +} + +pub enum Instr { + Copy { dst: VarId, src: VarId }, + Asm(A::Asm), +} + +pub type VarId = usize; +pub type FnId = usize; + +impl Program { + pub fn encode_data(&self, data: &mut Vec, sym_tab: &mut SymTable) { + for d in &self.ro_data { + let addr = data.len() as u64; + data.extend(&d.bytes); + sym_tab.insert(d.sym, addr); + } + } + + pub fn ro_data(&mut self, bytes: impl Into>) -> Symbol { + let bytes = bytes.into(); + let sym = self.reserve(); + self.ro_data.push(Data { bytes, sym }); + sym + } + + pub fn func(&mut self, instrs: impl Into>>) -> Symbol { + let instrs = instrs.into(); + let sym = self.reserve(); + self.funcs.push(Func { instrs, sym }); + sym + } + + fn reserve(&mut self) -> Symbol { + let res = Symbol(self.sym_count); + self.sym_count += 1; + res + } + + pub fn compile(&self) -> Result { + A::compile(self) + } + + pub fn sym_count(&self) -> usize { + self.sym_count + } +} + +impl Default for Program { + fn default() -> Self { + Self { + ro_data: Default::default(), + funcs: Default::default(), + entry: Default::default(), + sym_count: Default::default(), + } + } +} diff --git a/src/backend/ir/symbol.rs b/src/backend/ir/symbol.rs new file mode 100644 index 0000000..4fec23b --- /dev/null +++ b/src/backend/ir/symbol.rs @@ -0,0 +1,17 @@ +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub struct Symbol(pub(super) usize); + +pub type Addr = u64; + +pub struct SymTable(Vec>); +impl SymTable { + pub fn new(len: usize) -> Self { + Self(vec![None; len]) + } + pub fn insert(&mut self, sym: Symbol, addr: Addr) { + self.0[sym.0] = Some(addr); + } + pub fn get(&self, sym: Symbol) -> Option { + self.0[sym.0] + } +} diff --git a/src/backend/link.rs b/src/backend/link.rs new file mode 100644 index 0000000..027eefe --- /dev/null +++ b/src/backend/link.rs @@ -0,0 +1,12 @@ +use crate::backend::{Addr, elf}; + +pub struct LinkedProgram { + pub code: Vec, + pub entry: Option, +} + +impl LinkedProgram { + pub fn to_elf(&self) -> Vec { + elf::create(&self.code, self.entry.expect("no start")) + } +} diff --git a/src/backend/program.rs b/src/backend/link_old.rs similarity index 92% rename from src/backend/program.rs rename to src/backend/link_old.rs index 5c36af9..cd129cd 100644 --- a/src/backend/program.rs +++ b/src/backend/link_old.rs @@ -32,6 +32,7 @@ impl UnlinkedProgram { sym_table.insert(id, Addr(data.len() as u64)); data.extend(val); } + // align data.resize(data.len() + (4 - data.len() % 4), 0); for f in self.fns { let addr = Addr(data.len() as u64); @@ -70,15 +71,6 @@ impl UnlinkedProgram { } } -pub trait Instr { - fn encode( - &self, - data: &mut Vec, - syms: &mut SymTable, - ) -> Result, CompilerMsg>; - fn insert_sym(&self, data: &mut Vec, pos: usize, addr: Addr); -} - #[derive(Debug, Clone, Copy, PartialEq)] pub struct Addr(u64); impl Addr { diff --git a/src/backend/mod.rs b/src/backend/mod.rs index affa101..a53c0fb 100644 --- a/src/backend/mod.rs +++ b/src/backend/mod.rs @@ -1,14 +1,9 @@ -pub mod arch; -pub mod elf; -mod program; -mod symbol; +mod elf; + +mod ir; +mod link; mod test; -pub use arch::*; +pub use ir::*; +pub use link::*; pub use test::*; - -use program::*; - -pub enum Asm { - X86_64(x86_64::Asm), -} diff --git a/src/backend/symbol.rs b/src/backend/symbol.rs deleted file mode 100644 index 9471861..0000000 --- a/src/backend/symbol.rs +++ /dev/null @@ -1,147 +0,0 @@ -use std::collections::HashMap; - -#[derive(Clone, Copy, Hash, PartialEq, Eq)] -pub struct Symbol(usize); - -impl Symbol { - pub fn val(&self) -> usize { - self.0 - } - pub fn raw(val: usize) -> Self { - Self(val) - } -} - -/// intentionally does not have copy or clone; -/// this should only be consumed once -pub struct WritableSymbol(Symbol); - -impl std::ops::Deref for WritableSymbol { - type Target = Symbol; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -// pub struct SymbolSpace { -// ro_data: Vec<(Symbol, Vec)>, -// fns: Vec<(Symbol, IRLFunction)>, -// len: usize, -// labels: Vec>, -// } -// -// pub struct SymbolSpaceBuilder { -// symbols: usize, -// unwritten_fns: Vec<(WritableSymbol, FnID)>, -// fn_map: HashMap, -// data_map: HashMap, -// ro_data: Vec<(Symbol, Vec)>, -// fns: Vec<(Symbol, IRLFunction)>, -// labels: Vec>, -// } -// -// impl SymbolSpace { -// pub fn ro_data(&self) -> &[(Symbol, Vec)] { -// &self.ro_data -// } -// pub fn fns(&self) -> &[(Symbol, IRLFunction)] { -// &self.fns -// } -// pub fn labels(&self) -> &[Option] { -// &self.labels -// } -// pub fn len(&self) -> usize { -// self.len -// } -// } -// -// impl SymbolSpaceBuilder { -// pub fn new() -> Self { -// Self { -// symbols: 0, -// unwritten_fns: Vec::new(), -// fn_map: HashMap::new(), -// data_map: HashMap::new(), -// ro_data: Vec::new(), -// fns: Vec::new(), -// labels: Vec::new(), -// } -// } -// pub fn with_entries(entries: &[FnID]) -> SymbolSpaceBuilder { -// let mut s = Self::new(); -// for e in entries { -// s.func(*e); -// } -// s -// } -// pub fn pop_fn(&mut self) -> Option<(WritableSymbol, FnID)> { -// self.unwritten_fns.pop() -// } -// pub fn anon_ro_data(&mut self, data: &[u8], label: Option) -> Symbol { -// let sym = self.reserve(); -// self.write_ro_data(sym, data.to_vec(), label) -// } -// pub fn ro_data(&mut self, id: DataID, data: &[u8], label: Option<&str>) -> Symbol { -// match self.data_map.get(&id) { -// Some(s) => *s, -// None => { -// let sym = self.reserve(); -// self.data_map.insert(id, *sym); -// self.write_ro_data(sym, data.to_vec(), label.map(|l| l.to_string())) -// } -// } -// } -// pub fn func(&mut self, id: FnID) -> Symbol { -// match self.fn_map.get(&id) { -// Some(s) => *s, -// None => { -// let wsym = self.reserve(); -// let sym = *wsym; -// self.unwritten_fns.push((wsym, id)); -// self.fn_map.insert(id, sym); -// sym -// } -// } -// } -// pub fn write_ro_data( -// &mut self, -// sym: WritableSymbol, -// data: Vec, -// name: Option, -// ) -> Symbol { -// self.ro_data.push((*sym, data)); -// self.labels[sym.0.0] = name; -// *sym -// } -// pub fn write_fn( -// &mut self, -// sym: WritableSymbol, -// func: IRLFunction, -// name: Option, -// ) -> Symbol { -// self.fns.push((*sym, func)); -// self.labels[sym.0.0] = name; -// *sym -// } -// pub fn reserve(&mut self) -> WritableSymbol { -// let val = self.symbols; -// self.symbols += 1; -// self.labels.push(None); -// WritableSymbol(Symbol(val)) -// } -// pub fn len(&self) -> usize { -// self.symbols -// } -// pub fn finish(self) -> Option { -// if self.unwritten_fns.is_empty() { -// Some(SymbolSpace { -// len: self.symbols, -// fns: self.fns, -// ro_data: self.ro_data, -// labels: self.labels, -// }) -// } else { -// None -// } -// } -// } diff --git a/src/backend/test.rs b/src/backend/test.rs index 7811771..f947655 100644 --- a/src/backend/test.rs +++ b/src/backend/test.rs @@ -1,34 +1,28 @@ -use crate::backend::{ - program::{UnlinkedFunction, UnlinkedProgram}, - symbol::Symbol, - x86_64::{Instr, instr::*, reg::*}, -}; use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt, process::Command}; +use crate::{ + arch::x86_64::*, + backend::{Instr as BInstr, Program}, +}; + pub fn test_x86_64() { - let s = b"Hello world!\n"; - let program = UnlinkedProgram { - fns: vec![UnlinkedFunction { - instrs: vec![ - mov(rax, 1), - mov(rdi, 1), - lea(rsi, Symbol::raw(1)), - mov(rdx, s.len() as u64), - Instr::Syscall, - mov(rax, 0x3c), - mov(rdi, 39), - Instr::Syscall, - ], - sym: Symbol::raw(0), - locations: Default::default(), - }], - ro_data: vec![(s.to_vec(), Symbol::raw(1))], - sym_count: 2, - start: Some(Symbol::raw(0)), - }; - let Ok(linked) = program.link() else { - panic!("failed to link"); - }; + let mut program = Program::::default(); + let text = b"Hello world!\n"; + let text_sym = program.ro_data(text); + let entry = program.func([BInstr::Asm(Asm { + instrs: vec![ + mov(ax, 1), + mov(di, 1), + lea(rsi, text_sym), + mov(dx, text.len() as u64), + Instr::Syscall, + mov(ax, 0x3c), + mov(di, 39), + Instr::Syscall, + ], + })]); + program.entry = Some(entry); + let linked = program.compile().expect("failed to compile"); let binary = linked.to_elf(); let path = "./x86_64_test"; let mut file = OpenOptions::new() diff --git a/src/ir/structs/mod.rs b/src/ir/structs/mod.rs index 9618385..05748ed 100644 --- a/src/ir/structs/mod.rs +++ b/src/ir/structs/mod.rs @@ -1,8 +1,6 @@ mod namespace; pub use namespace::*; -use crate::backend::Asm; - use super::Id; pub struct Fn { @@ -21,7 +19,6 @@ pub enum StatementTy { Define { target: VarId, val: VarId }, Assign { target: VarId, val: VarId }, Call { target: VarId, args: Vec }, - Asm(Asm), } pub struct Var { diff --git a/src/main.rs b/src/main.rs index a514ac0..876df6e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use crate::{io::CompilerOutput, parser_ir::parse_program}; +mod arch; mod backend; mod io; mod ir; diff --git a/src/parser/nodes/asm/mod.rs b/src/parser/nodes/asm/mod.rs index c53a0e9..772e954 100644 --- a/src/parser/nodes/asm/mod.rs +++ b/src/parser/nodes/asm/mod.rs @@ -1,5 +1,5 @@ use crate::{ - backend::arch::x86_64::Asm, + arch::x86_64::Asm, parser::{Node, cursor::Token}, }; diff --git a/src/parser/nodes/asm/x86_64.rs b/src/parser/nodes/asm/x86_64.rs index e9cfc7c..b162646 100644 --- a/src/parser/nodes/asm/x86_64.rs +++ b/src/parser/nodes/asm/x86_64.rs @@ -1,5 +1,5 @@ use crate::{ - backend::arch::x86_64::*, + arch::x86_64::*, io::{CompilerMsg, Span}, parser::{ Node, diff --git a/x86_64_test b/x86_64_test index 42cd68a..2d5f1f7 100755 Binary files a/x86_64_test and b/x86_64_test differ