From c9add923be6761279d99cc6891a314e82e5e802b Mon Sep 17 00:00:00 2001 From: Shadow Cat Date: Sun, 7 Jun 2026 21:22:32 -0400 Subject: [PATCH] pe work --- src/arch/x86_64/asm.rs | 30 ++++--- src/arch/x86_64/encode.rs | 24 ++++-- src/arch/x86_64/reg.rs | 2 +- src/arch/x86_64/test/bin.rs | 48 +++++++++--- src/backend/{ => container}/elf.rs | 16 ++-- src/backend/container/mod.rs | 38 +++++++++ src/backend/container/pe/data_dir.rs | 27 +++++++ src/backend/container/pe/header.rs | 67 ++++++++++++++++ src/backend/container/pe/import.rs | 77 ++++++++++++++++++ src/backend/container/pe/mod.rs | 112 +++++++++++++++++++++++++++ src/backend/link.rs | 8 -- src/backend/mod.rs | 4 +- src/parser/nodes/asm/x86_64.rs | 6 +- x86_64_test.exe | Bin 0 -> 372 bytes 14 files changed, 413 insertions(+), 46 deletions(-) rename src/backend/{ => container}/elf.rs (89%) create mode 100644 src/backend/container/mod.rs create mode 100644 src/backend/container/pe/data_dir.rs create mode 100644 src/backend/container/pe/header.rs create mode 100644 src/backend/container/pe/import.rs create mode 100644 src/backend/container/pe/mod.rs create mode 100755 x86_64_test.exe diff --git a/src/arch/x86_64/asm.rs b/src/arch/x86_64/asm.rs index 7c1b19d..b2af8a0 100644 --- a/src/arch/x86_64/asm.rs +++ b/src/arch/x86_64/asm.rs @@ -9,29 +9,35 @@ pub struct Asm { #[derive(Clone, Copy)] pub enum Instr { - Mov { dst: RegMode, src: RegImm }, + Mov { dst: RegMode, src: RegModeImm }, Int(u8), Call(Symbol), Ret, Syscall, Lea { dst: RegMode, sym: Symbol }, - Push(Reg), + Push(RegImm), Pop(Reg), } #[derive(Clone, Copy)] -pub enum RegImm { +pub enum RegModeImm { Reg(RegMode), Imm(u64), } -impl From for RegImm { +#[derive(Clone, Copy)] +pub enum RegImm { + Reg(Reg), + Imm(u64), +} + +impl From for RegModeImm { fn from(value: RegMode) -> Self { Self::Reg(value) } } -impl From for RegImm { +impl From for RegModeImm { fn from(value: u64) -> Self { Self::Imm(value) } @@ -39,7 +45,7 @@ impl From for RegImm { mod fns { use super::*; - pub fn mov(dst: RegMode, src: impl Into) -> Instr { + pub fn mov(dst: RegMode, src: impl Into) -> Instr { Instr::Mov { dst, src: src.into(), @@ -50,9 +56,15 @@ mod fns { Instr::Lea { dst, sym } } - pub fn push(reg: RegMode) -> Instr { - assert!(reg.width == BitWidth::B64); - Instr::Push(reg.reg) + pub fn push(reg: impl Into) -> Instr { + Instr::Push(match reg.into() { + RegModeImm::Reg(reg) => { + assert_eq!(reg.width, BitWidth::B64); + assert!(!reg.high); + RegImm::Reg(reg.reg) + } + RegModeImm::Imm(imm) => RegImm::Imm(imm), + }) } pub fn pop(reg: RegMode) -> Instr { diff --git a/src/arch/x86_64/encode.rs b/src/arch/x86_64/encode.rs index cd4af33..08703f9 100644 --- a/src/arch/x86_64/encode.rs +++ b/src/arch/x86_64/encode.rs @@ -50,7 +50,7 @@ fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMs impl Encoder { // assembly - pub fn mov(&mut self, dst: RegMode, src: impl Into) -> Result<(), CompilerMsg> { + pub fn mov(&mut self, dst: RegMode, src: impl Into) -> Result<(), CompilerMsg> { let src = src.into(); let width = dst.width; if width == BitWidth::B16 { @@ -59,7 +59,7 @@ impl Encoder { let dst8 = dst.gt8(); let b64 = width == BitWidth::B64; let b8 = width == BitWidth::B8; - let src8 = if let RegImm::Reg(src) = src { + let src8 = if let RegModeImm::Reg(src) = src { src.gt8() } else { false @@ -70,7 +70,7 @@ impl Encoder { .push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2)); } match src { - RegImm::Reg(src) => { + RegModeImm::Reg(src) => { if dst.width != src.width { return Err("src and dst are not the same size".into()); } @@ -78,7 +78,7 @@ impl Encoder { let modrm = 0b11_000_000 | (src.base() << 3) | dst.base(); self.data.push(modrm); } - RegImm::Imm(imm) => { + RegModeImm::Imm(imm) => { if imm > width.max() { return Err("immediate cannot fit in register".into()); } @@ -122,6 +122,17 @@ impl Encoder { self.data.push(0x50 | reg.base()); } + pub fn push_imm(&mut self, imm: u64) { + const U8: u64 = 2 << 8; + if let 0..U8 = imm { + self.data.push(0x6a); + self.data.push(imm as u8); + } else { + self.data.push(0x68); + self.data.extend(imm.to_le_bytes()); + } + } + pub fn pop(&mut self, reg: Reg) { if reg.gt8() { self.data.push(0x41); @@ -148,7 +159,10 @@ impl Encoder { Instr::Lea { dst, sym } => self.lea(dst, sym), Instr::Call(sym) => self.call(sym), Instr::Ret => self.ret(), - Instr::Push(reg) => self.push(reg), + Instr::Push(val) => match val { + RegImm::Reg(reg) => self.push(reg), + RegImm::Imm(imm) => self.push_imm(imm), + }, Instr::Pop(reg) => self.pop(reg), } Ok(()) diff --git a/src/arch/x86_64/reg.rs b/src/arch/x86_64/reg.rs index d772ea3..e161f60 100644 --- a/src/arch/x86_64/reg.rs +++ b/src/arch/x86_64/reg.rs @@ -8,7 +8,7 @@ pub struct RegMode { pub high: bool, } -#[derive(Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq)] pub enum BitWidth { B64, B32, diff --git a/src/arch/x86_64/test/bin.rs b/src/arch/x86_64/test/bin.rs index dfaea9b..eddbdf2 100644 --- a/src/arch/x86_64/test/bin.rs +++ b/src/arch/x86_64/test/bin.rs @@ -5,6 +5,11 @@ use crate::{ use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt, process::Command}; pub fn run() { + windows(); + // linux(); +} + +fn linux() { let mut program = Program::::default(); let text = b"Hello world!\n"; let text_sym = program.ro_data(text); @@ -39,16 +44,7 @@ pub fn run() { let linked = program.compile().expect("failed to compile"); let binary = linked.to_elf(); let path = "./x86_64_test"; - let mut file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .mode(0o750) - .open(path) - .expect("Failed to create file"); - file.write_all(&binary).expect("Failed to write to file"); - file.sync_all().expect("Failed to sync file"); - drop(file); + write(path, &binary); println!("running..."); let gdb = false; let mut proc = if gdb { @@ -72,3 +68,35 @@ pub fn run() { std::process::exit(code); } } + +fn windows() { + let mut program = Program::::default(); + let entry = program.func([BInstr::Asm(Asm { + instrs: vec![push(39), pop(rax), Instr::Ret], + })]); + program.entry = Some(entry); + let linked = program.compile().expect("failed to compile"); + let binary = linked.to_pe(); + let path = "./x86_64_test.exe"; + write(path, &binary); + + let mut cmd = Command::new("wine"); + cmd.arg("x86_64_test"); + let mut proc = cmd.spawn().expect("failed to run"); + let status = proc.wait().expect("failed to wait"); + if let Some(code) = status.code() { + std::process::exit(code); + } +} + +fn write(path: &str, binary: &[u8]) { + let mut file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .mode(0o750) + .open(path) + .expect("Failed to create file"); + file.write_all(binary).expect("Failed to write to file"); + file.sync_all().expect("Failed to sync file"); +} diff --git a/src/backend/elf.rs b/src/backend/container/elf.rs similarity index 89% rename from src/backend/elf.rs rename to src/backend/container/elf.rs index 117d76e..71d2dca 100644 --- a/src/backend/elf.rs +++ b/src/backend/container/elf.rs @@ -1,4 +1,4 @@ -use crate::backend::Addr; +use crate::backend::{Addr, LinkedProgram, container::as_u8_slice}; #[repr(C)] pub struct ELF64Header { @@ -73,7 +73,7 @@ pub enum EType { Core = 4, } -// this is currently specialized for riscv64; obviously add params later +// this is currently specialized for x86_64; obviously add params later pub fn create(program: &[u8], start_offset: u64) -> Vec { let pie = true; let addr_start = if pie { 0 } else { 0x400000 }; @@ -116,14 +116,14 @@ pub fn create(program: &[u8], start_offset: u64) -> Vec { section_header_str_idx: 0x0, }; let mut bytes: Vec = Vec::new(); - unsafe { - bytes.extend(as_u8_slice(&header)); - bytes.extend(as_u8_slice(&program_header)); - } + bytes.extend(as_u8_slice(&header)); + bytes.extend(as_u8_slice(&program_header)); bytes.extend(program); bytes } -unsafe fn as_u8_slice(p: &T) -> &[u8] { - unsafe { core::slice::from_raw_parts((p as *const T) as *const u8, size_of::()) } +impl LinkedProgram { + pub fn to_elf(&self) -> Vec { + create(&self.code, self.entry.expect("no start")) + } } diff --git a/src/backend/container/mod.rs b/src/backend/container/mod.rs new file mode 100644 index 0000000..8bf524f --- /dev/null +++ b/src/backend/container/mod.rs @@ -0,0 +1,38 @@ +pub mod elf; +pub mod pe; + +#[derive(Default)] +pub struct ByteEncoder { + pub data: Vec, +} + +trait Pushable { + fn push(self, data: &mut Vec); +} + +impl ByteEncoder { + pub fn push(&mut self, byte: u8) { + self.data.push(byte); + } + pub fn extend(&mut self, iter: impl IntoIterator) { + self.data.extend(iter); + } + pub fn val(&mut self, val: &T) { + self.data.extend(as_u8_slice(val)); + } + pub fn fill(&mut self, pos: usize, val: &T) { + self.data[pos..pos + size_of::()].copy_from_slice(as_u8_slice(val)); + } + pub fn pos(&self) -> usize { + self.data.len() + } + pub fn reserve(&mut self) -> usize { + let pos = self.pos(); + self.val(&T::default()); + pos + } +} + +fn as_u8_slice(p: &T) -> &[u8] { + unsafe { core::slice::from_raw_parts((p as *const T) as *const u8, size_of::()) } +} diff --git a/src/backend/container/pe/data_dir.rs b/src/backend/container/pe/data_dir.rs new file mode 100644 index 0000000..dfd3f3e --- /dev/null +++ b/src/backend/container/pe/data_dir.rs @@ -0,0 +1,27 @@ +#[derive(Default)] +#[repr(C)] +pub struct DataDirs { + pub export: DataDir, + pub import: DataDir, + pub rsc: DataDir, + pub exception: DataDir, + pub cert: DataDir, + pub base_reloc: DataDir, + pub debug: DataDir, + pub arch: DataDir, + pub global_ptr: DataDir, + pub tls: DataDir, + pub load_config: DataDir, + pub bound_import: DataDir, + pub import_addr: DataDir, + pub delay_import_desc: DataDir, + pub clr_runtime_header: DataDir, + pub reserved: DataDir, +} + +#[derive(Default)] +#[repr(C)] +pub struct DataDir { + pub virt_addr_rva: u32, + pub size: u32, +} diff --git a/src/backend/container/pe/header.rs b/src/backend/container/pe/header.rs new file mode 100644 index 0000000..6a71fa7 --- /dev/null +++ b/src/backend/container/pe/header.rs @@ -0,0 +1,67 @@ +#[repr(C)] +pub struct MZHeader { + pub magic: u16, + pub stuff: [u16; 15 + 4 + 10], + pub lfanew: u32, +} + +#[repr(C)] +pub struct PeHeader { + pub magic: u32, + pub machine: u16, + pub num_sections: u16, + pub time_date_stamp: u32, + pub sym_tab_ptr: u32, + pub num_symbols: u32, + pub opt_header_size: u16, + pub characteristics: u16, +} + +#[repr(C)] +#[derive(Default)] +pub struct OptHeader64 { + pub magic: u16, + pub major_linker_ver: u8, + pub minor_linker_ver: u8, + pub code_size: u32, + pub init_data_size: u32, + pub uninit_data_size: u32, + pub entry_addr: u32, + pub code_base: u32, + pub image_base: u64, + pub section_align: u32, + pub file_align: u32, + pub major_os_ver: u16, + pub minor_os_ver: u16, + pub major_image_ver: u16, + pub minor_image_ver: u16, + pub major_subsystem_ver: u16, + pub minor_subsystem_ver: u16, + pub win32_ver: u32, + pub image_size: u32, + pub headers_size: u32, + pub checksum: u32, + pub subsystem: u16, + pub dll_characteristics: u16, + pub stack_reserve_size: u64, + pub stack_commit_size: u64, + pub heap_reserve_size: u64, + pub heap_commit_size: u64, + pub loader_flags: u32, + pub num_of_rva_and_sizes: u32, +} + +#[repr(C)] +#[derive(Default)] +pub struct Section { + pub name: [u8; 8], + pub virtual_size: u32, + pub virtual_addr: u32, + pub raw_data_size: u32, + pub raw_data_ptr: u32, + pub reloc_ptr: u32, + pub line_num_ptr: u32, + pub num_relocs: u16, + pub num_line_nums: u16, + pub characteristics: u32, +} diff --git a/src/backend/container/pe/import.rs b/src/backend/container/pe/import.rs new file mode 100644 index 0000000..bdd6013 --- /dev/null +++ b/src/backend/container/pe/import.rs @@ -0,0 +1,77 @@ +use crate::backend::ByteEncoder; + +pub struct Import { + pub name: String, + pub names: Vec, +} + +pub fn encode(data: &mut ByteEncoder, imports: &[Import]) -> usize { + let mut names = 0; + for import in imports { + for name in &import.names { + data.extend(ImportLookupEntry::name().bytes()); + names += 1; + } + data.extend(ImportLookupEntry::NULL.bytes()); + } + let table_addr = data.pos(); + for import in imports { + let idt = ImportDirTable { + lookup_table_rva: todo!(), + time_date_stamp: 0, + forwarder_chain: 0, + name_rva: todo!(), + address_table_rva: todo!(), + }; + } + for import in imports { + for name in &import.names { + hint_name_entry(data, 0, &name); + } + } +} + +#[repr(C)] +pub struct ImportDirTable { + pub lookup_table_rva: u32, + pub time_date_stamp: u32, + pub forwarder_chain: u32, + pub name_rva: u32, + pub address_table_rva: u32, +} + +impl ImportDirTable { + pub const NULL: Self = Self { + lookup_table_rva: 0, + time_date_stamp: 0, + forwarder_chain: 0, + name_rva: 0, + address_table_rva: 0, + }; +} + +#[repr(C)] +pub struct ImportLookupEntry(u64); + +impl ImportLookupEntry { + pub const NULL: Self = Self(0); + pub fn name(hint_name_table_rva: u32) -> Self { + assert!(hint_name_table_rva >> 30 == 0); + Self(hint_name_table_rva as u64) + } + pub fn ordinal(ordinal: u16) -> Self { + Self(ordinal as u64 | (1 << 63)) + } + pub fn bytes(&self) -> [u8; 8] { + self.0.to_le_bytes() + } +} + +pub fn hint_name_entry(out: &mut Vec, hint: u16, name: &str) { + out.extend(hint.to_le_bytes()); + out.extend(name.as_bytes()); + out.push(0); + if out.len() % 2 == 1 { + out.push(0); + } +} diff --git a/src/backend/container/pe/mod.rs b/src/backend/container/pe/mod.rs new file mode 100644 index 0000000..40b3523 --- /dev/null +++ b/src/backend/container/pe/mod.rs @@ -0,0 +1,112 @@ +mod data_dir; +mod header; +mod import; + +use data_dir::*; +use header::*; + +use crate::backend::{ByteEncoder, LinkedProgram, pe::import::Import}; + +pub fn create(program: &[u8], start_offset: u64) -> Vec { + let mut data = ByteEncoder::default(); + let file_align = 1; + let section_align = 1; + let code_size = program.len() as u32; + + let mzheader = MZHeader { + magic: u16::from_ne_bytes(*b"MZ"), + stuff: [0; _], + lfanew: size_of::() as u32, + }; + data.val(&mzheader); + let header = PeHeader { + magic: u32::from_ne_bytes(*b"PE\0\0"), + machine: 0x8664, + num_sections: 1, + time_date_stamp: 0, + sym_tab_ptr: 0, + num_symbols: 0, + opt_header_size: size_of::() as u16, + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format + // executable | can handle >2GB addrs | debug info removed + characteristics: 0x2 | 0x20 | 0x0200, + }; + data.val(&header); + + let opt_header_pos = data.reserve::(); + + let num_of_rva_and_sizes: u32 = (size_of::() / size_of::()) as u32; + data.val(&DataDirs::default()); + + let code_sect_pos = data.reserve::
(); + let hdr_size = data.pos() as u32; + + let imports = [Import { + name: "kernel32.lib".to_string(), + names: ["GetStdHandle", "WriteFile", "ExitProcess"] + .map(String::from) + .to_vec(), + }]; + import::encode(&mut data, &imports); + + let code_addr = data.pos() as u32; + data.data.extend(program); + + let code_section = Section { + name: *b".text\0\0\0", + virtual_size: code_size, + virtual_addr: hdr_size.next_multiple_of(section_align), + raw_data_size: code_size.next_multiple_of(file_align), + raw_data_ptr: code_addr, + reloc_ptr: 0, + line_num_ptr: 0, + num_relocs: 0, + num_line_nums: 0, + characteristics: 0x60000020, + }; + + let file_size = data.pos() as u32; + + let opt_header = OptHeader64 { + magic: 0x20b, + major_linker_ver: 8, + minor_linker_ver: 0, + code_size: code_size.next_multiple_of(file_align), + init_data_size: 0, + uninit_data_size: 0, + entry_addr: code_addr + start_offset as u32, + code_base: code_addr, + image_base: 0x400000, + section_align, + file_align, + major_os_ver: 4, + minor_os_ver: 0, + major_image_ver: 0, + minor_image_ver: 0, + major_subsystem_ver: 4, + minor_subsystem_ver: 0, + win32_ver: 0, + image_size: file_size.next_multiple_of(section_align), + headers_size: hdr_size.next_multiple_of(file_align), + checksum: 0, + subsystem: 3, // windows CLI app + dll_characteristics: 0x400, + stack_reserve_size: 0x100000, + stack_commit_size: 0x1000, + heap_reserve_size: 0x100000, + heap_commit_size: 0x1000, + loader_flags: 0, + num_of_rva_and_sizes, + }; + + data.fill(opt_header_pos, &opt_header); + data.fill(code_sect_pos, &code_section); + + data.data +} + +impl LinkedProgram { + pub fn to_pe(&self) -> Vec { + create(&self.code, self.entry.expect("no start")) + } +} diff --git a/src/backend/link.rs b/src/backend/link.rs index 9336c52..7cb3964 100644 --- a/src/backend/link.rs +++ b/src/backend/link.rs @@ -1,12 +1,4 @@ -use crate::backend::elf; - pub struct LinkedProgram { pub code: Vec, pub entry: Option, } - -impl LinkedProgram { - pub fn to_elf(&self) -> Vec { - elf::create(&self.code, self.entry.expect("no start")) - } -} diff --git a/src/backend/mod.rs b/src/backend/mod.rs index 77eab1b..991c629 100644 --- a/src/backend/mod.rs +++ b/src/backend/mod.rs @@ -1,7 +1,7 @@ -mod elf; - +mod container; mod ir; mod link; +pub use container::*; pub use ir::*; pub use link::*; diff --git a/src/parser/nodes/asm/x86_64.rs b/src/parser/nodes/asm/x86_64.rs index 8361c6f..fc101eb 100644 --- a/src/parser/nodes/asm/x86_64.rs +++ b/src/parser/nodes/asm/x86_64.rs @@ -56,12 +56,12 @@ pub fn parse_imm(mut s: &str, span: Span) -> Result { u64::from_str_radix(s, radix).map_err(|_| CompilerMsg::from(("invalid immediate", span))) } -pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result { +pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result { let next = ctx.expect_next()?; let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register or immediate"); Ok(match &next { - Token::Ident(ident) => RegImm::Reg(RegMode::parse(ident).ok_or_else(err)?), - Token::Lit(LitTy::Number(num)) => RegImm::Imm(parse_imm(num, ctx.span)?), + Token::Ident(ident) => RegModeImm::Reg(RegMode::parse(ident).ok_or_else(err)?), + Token::Lit(LitTy::Number(num)) => RegModeImm::Imm(parse_imm(num, ctx.span)?), _ => return Err(err()), }) } diff --git a/x86_64_test.exe b/x86_64_test.exe new file mode 100755 index 0000000000000000000000000000000000000000..98cb72a1d96d4c6e208df9983967453fcb4c2c18 GIT binary patch literal 372 zcmeZ`Vjvqh0CfhqGBBjHF`}4Jz@Ws$&BVdLf+Si1R06^b3=UvX5EF!9YG8B;Tpa@g wGgw*xteyc%F~AsbK3oK%Zh%O=lGKV4sDD8I1o;QzUxb4c7#I?=)FTc90D(UWhX4Qo literal 0 HcmV?d00001