From e4acaf40aa2c24bd1fb5cee5c9204631b91440df Mon Sep 17 00:00:00 2001 From: Shadow Cat Date: Tue, 9 Jun 2026 00:08:26 -0400 Subject: [PATCH] IMPORTS WORKING --- src/arch/x86_64/asm.rs | 18 +++--- src/arch/x86_64/encode.rs | 60 +++++++++++++---- src/arch/x86_64/test/bin.rs | 100 ++++++++++++++--------------- src/arch/x86_64/test/reg.rs | 3 +- src/backend/container/encode.rs | 20 ++++++ src/backend/container/pe/import.rs | 31 +++++---- src/backend/container/pe/mod.rs | 37 ++++++----- src/backend/ir/mod.rs | 34 ++++++++-- src/backend/link.rs | 11 ++++ src/parser/nodes/asm/x86_64.rs | 6 +- x86_64_test.exe | Bin 540 -> 552 bytes 11 files changed, 205 insertions(+), 115 deletions(-) diff --git a/src/arch/x86_64/asm.rs b/src/arch/x86_64/asm.rs index b2af8a0..ea78a21 100644 --- a/src/arch/x86_64/asm.rs +++ b/src/arch/x86_64/asm.rs @@ -9,18 +9,20 @@ pub struct Asm { #[derive(Clone, Copy)] pub enum Instr { - Mov { dst: RegMode, src: RegModeImm }, + Mov { dst: RegMode, src: RegImmMem }, Int(u8), Call(Symbol), + CallMem(Symbol), Ret, Syscall, Lea { dst: RegMode, sym: Symbol }, Push(RegImm), Pop(Reg), + Sub, } #[derive(Clone, Copy)] -pub enum RegModeImm { +pub enum RegImmMem { Reg(RegMode), Imm(u64), } @@ -31,13 +33,13 @@ pub enum RegImm { Imm(u64), } -impl From for RegModeImm { +impl From for RegImmMem { fn from(value: RegMode) -> Self { Self::Reg(value) } } -impl From for RegModeImm { +impl From for RegImmMem { fn from(value: u64) -> Self { Self::Imm(value) } @@ -45,7 +47,7 @@ impl From for RegModeImm { mod fns { use super::*; - pub fn mov(dst: RegMode, src: impl Into) -> Instr { + pub fn mov(dst: RegMode, src: impl Into) -> Instr { Instr::Mov { dst, src: src.into(), @@ -56,14 +58,14 @@ mod fns { Instr::Lea { dst, sym } } - pub fn push(reg: impl Into) -> Instr { + pub fn push(reg: impl Into) -> Instr { Instr::Push(match reg.into() { - RegModeImm::Reg(reg) => { + RegImmMem::Reg(reg) => { assert_eq!(reg.width, BitWidth::B64); assert!(!reg.high); RegImm::Reg(reg.reg) } - RegModeImm::Imm(imm) => RegImm::Imm(imm), + RegImmMem::Imm(imm) => RegImm::Imm(imm), }) } diff --git a/src/arch/x86_64/encode.rs b/src/arch/x86_64/encode.rs index 08703f9..cd94a6d 100644 --- a/src/arch/x86_64/encode.rs +++ b/src/arch/x86_64/encode.rs @@ -1,14 +1,18 @@ -use super::*; -use crate::backend::{LinkedProgram, SymTable, Symbol}; +use std::collections::HashMap; -pub struct Encoder { +use super::*; +use crate::backend::{LibImport, LinkedProgram, SymImport, SymTable, Symbol}; + +pub struct Encoder<'a> { pub data: Vec, pub sym_tab: SymTable, pub missing: Vec<(usize, Symbol)>, + pub sym_refs: HashMap>, + pub program: &'a Program, } pub fn compile(p: &Program) -> Result, CompilerMsg> { - let mut encoder = Encoder::new(p.sym_count()); + let mut encoder = Encoder::new(p); p.encode_data(&mut encoder.data, &mut encoder.sym_tab); @@ -28,9 +32,26 @@ pub fn compile(p: &Program) -> Result, CompilerMsg> { encoder.data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)) } + let imports = p + .external + .iter() + .map(|e| LibImport { + name: e.file.clone(), + syms: e + .syms + .iter() + .map(|&s| SymImport { + name: p.sym_info(s).name.clone(), + usages: encoder.sym_refs.entry(s).or_default().clone(), + }) + .collect(), + }) + .collect(); + Ok(LinkedProgram { code: encoder.data, entry: p.entry.and_then(|e| encoder.sym_tab.get(e)), + imports, }) } @@ -47,10 +68,10 @@ fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMs Ok(()) } -impl Encoder { +impl Encoder<'_> { // assembly - pub fn mov(&mut self, dst: RegMode, src: impl Into) -> Result<(), CompilerMsg> { + pub fn mov(&mut self, dst: RegMode, src: impl Into) -> Result<(), CompilerMsg> { let src = src.into(); let width = dst.width; if width == BitWidth::B16 { @@ -59,7 +80,7 @@ impl Encoder { let dst8 = dst.gt8(); let b64 = width == BitWidth::B64; let b8 = width == BitWidth::B8; - let src8 = if let RegModeImm::Reg(src) = src { + let src8 = if let RegImmMem::Reg(src) = src { src.gt8() } else { false @@ -70,7 +91,7 @@ impl Encoder { .push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2)); } match src { - RegModeImm::Reg(src) => { + RegImmMem::Reg(src) => { if dst.width != src.width { return Err("src and dst are not the same size".into()); } @@ -78,7 +99,7 @@ impl Encoder { let modrm = 0b11_000_000 | (src.base() << 3) | dst.base(); self.data.push(modrm); } - RegModeImm::Imm(imm) => { + RegImmMem::Imm(imm) => { if imm > width.max() { return Err("immediate cannot fit in register".into()); } @@ -111,6 +132,11 @@ impl Encoder { self.sym_offset4(sym); } + pub fn call_mem(&mut self, sym: Symbol) { + self.data.extend([0xff, 0x15]); + self.sym_offset4(sym); + } + pub fn ret(&mut self) { self.data.push(0xc3); } @@ -145,7 +171,11 @@ impl Encoder { let Some(addr) = self.sym_tab.get(sym) else { let pos = self.data.len(); self.data.extend([0; 4]); - self.missing.push((pos, sym)); + if self.program.sym_info(sym).external { + self.sym_refs.entry(sym).or_default().push(pos); + } else { + self.missing.push((pos, sym)); + } return; }; self.data.extend(addr_offset(self.data.len(), addr)); @@ -164,6 +194,8 @@ impl Encoder { RegImm::Imm(imm) => self.push_imm(imm), }, Instr::Pop(reg) => self.pop(reg), + Instr::CallMem(sym) => self.call_mem(sym), + Instr::Sub => self.data.extend([0x48, 0x83, 0xec, 0x28]), } Ok(()) } @@ -176,12 +208,14 @@ fn addr_offset(pos: usize, addr: u64) -> [u8; 4] { offset.to_le_bytes() } -impl Encoder { - pub fn new(sym_count: usize) -> Self { +impl<'a> Encoder<'a> { + pub fn new(program: &'a Program) -> Self { Self { data: Default::default(), - sym_tab: SymTable::new(sym_count), + sym_tab: SymTable::new(program.sym_count()), missing: Default::default(), + sym_refs: Default::default(), + program, } } } diff --git a/src/arch/x86_64/test/bin.rs b/src/arch/x86_64/test/bin.rs index 238065f..82af65b 100644 --- a/src/arch/x86_64/test/bin.rs +++ b/src/arch/x86_64/test/bin.rs @@ -1,6 +1,6 @@ use crate::{ arch::x86_64::*, - backend::{Instr as BInstr, Program, pe::LibImport}, + backend::{Instr as BInstr, Program}, }; use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt, process::Command}; @@ -12,34 +12,40 @@ pub fn run() { fn linux() { let mut program = Program::::default(); let text = b"Hello world!\n"; - let text_sym = program.ro_data(text); + let text_sym = program.ro_data("hello_en", text); let text2 = "世界、こんにちは!\n"; - let text_sym2 = program.ro_data(text2); - let hello2 = program.func([BInstr::Asm(Asm { - instrs: vec![ - mov(ax, 1), - mov(di, 1), - lea(rsi, text_sym2), - mov(dx, text2.len() as u64), - Instr::Syscall, - Instr::Ret, - ], - })]); - let entry = program.func([BInstr::Asm(Asm { - instrs: vec![ - mov(di, 39), - push(rdi), - mov(ax, 1), - mov(di, 1), - lea(rsi, text_sym), - mov(dx, text.len() as u64), - Instr::Syscall, - Instr::Call(hello2), - mov(ax, 0x3c), - pop(rdi), - Instr::Syscall, - ], - })]); + let text_sym2 = program.ro_data("hello_jp", text2); + let hello2 = program.func( + "hello2", + [BInstr::Asm(Asm { + instrs: vec![ + mov(ax, 1), + mov(di, 1), + lea(rsi, text_sym2), + mov(dx, text2.len() as u64), + Instr::Syscall, + Instr::Ret, + ], + })], + ); + let entry = program.func( + "main", + [BInstr::Asm(Asm { + instrs: vec![ + mov(di, 39), + push(rdi), + mov(ax, 1), + mov(di, 1), + lea(rsi, text_sym), + mov(dx, text.len() as u64), + Instr::Syscall, + Instr::Call(hello2), + mov(ax, 0x3c), + pop(rdi), + Instr::Syscall, + ], + })], + ); program.entry = Some(entry); let linked = program.compile().expect("failed to compile"); let binary = linked.to_elf(); @@ -71,34 +77,28 @@ fn linux() { fn windows() { let mut program = Program::::default(); - let entry = program.func([BInstr::Asm(Asm { - instrs: vec![ - push(39), - pop(rax), - Instr::Ret - ], - })]); + let [get_std_handle, write_file, exit_process] = + program.external("KERNEL32.dll", ["GetStdHandle", "WriteFile", "ExitProcess"]); + let entry = program.func( + "main", + [BInstr::Asm(Asm { + instrs: vec![Instr::Sub, mov(ecx, 40), Instr::CallMem(exit_process)], + })], + ); program.entry = Some(entry); let linked = program.compile().expect("failed to compile"); - let imports = &[LibImport { - name: "KERNEL32.dll".to_string(), - syms: ["GetStdHandle", "WriteFile", "ExitProcess"] - .map(String::from) - .to_vec(), - }]; - - let binary = linked.to_pe(imports); + let binary = linked.to_pe(); let path = "./x86_64_test.exe"; write(path, &binary); - // let mut cmd = Command::new("wine"); - // cmd.arg("x86_64_test"); - // let mut proc = cmd.spawn().expect("failed to run"); - // let status = proc.wait().expect("failed to wait"); - // if let Some(code) = status.code() { - // std::process::exit(code); - // } + let mut cmd = Command::new("wine"); + cmd.arg("x86_64_test"); + let mut proc = cmd.spawn().expect("failed to run"); + let status = proc.wait().expect("failed to wait"); + if let Some(code) = status.code() { + std::process::exit(code); + } } fn write(path: &str, binary: &[u8]) { diff --git a/src/arch/x86_64/test/reg.rs b/src/arch/x86_64/test/reg.rs index 8152aee..5fb6059 100644 --- a/src/arch/x86_64/test/reg.rs +++ b/src/arch/x86_64/test/reg.rs @@ -2,7 +2,8 @@ use super::*; fn eq(expected: impl AsRef<[u8]>, asm: Instr) { let expected = expected.as_ref(); - let mut encoder = Encoder::new(0); + let program = Program::default(); + let mut encoder = Encoder::new(&program); if let Err(e) = encoder.asm(asm) { panic!("expected {expected:x?}, failed to compile: {}", e.msg); } diff --git a/src/backend/container/encode.rs b/src/backend/container/encode.rs index 0f03e02..412f53f 100644 --- a/src/backend/container/encode.rs +++ b/src/backend/container/encode.rs @@ -26,6 +26,26 @@ impl ByteEncoder { self.data.resize(self.data.len().next_multiple_of(align), 0); } + fn ptr_at(&mut self, index: usize) -> *mut T { + let slice = &mut self.data[index..index + size_of::()]; + (slice as *mut [u8]) as *mut T + } + + pub fn set_at(&mut self, index: usize, val: T) { + let ptr = self.ptr_at::(index); + unsafe { + ptr.write_unaligned(val); + } + } + + pub fn edit_at(&mut self, index: usize, edit: impl FnOnce(T) -> T) { + let ptr = self.ptr_at::(index); + unsafe { + let val = ptr.read_unaligned(); + ptr.write_unaligned(edit(val)); + } + } + #[must_use] pub fn reserve(&mut self) -> Reserved { let pos = self.pos(); diff --git a/src/backend/container/pe/import.rs b/src/backend/container/pe/import.rs index e8487a0..fb3cced 100644 --- a/src/backend/container/pe/import.rs +++ b/src/backend/container/pe/import.rs @@ -1,18 +1,9 @@ -use crate::backend::pe::data_dir::DataDir; +use crate::backend::{LibImport, pe::data_dir::DataDir}; use super::ByteEncoder; -pub struct LibImport { - pub name: String, - pub syms: Vec, -} - -pub struct SymImport { - name: String, - usages: Vec, -} - -pub fn encode(data: &mut ByteEncoder, imports: &[LibImport]) -> DataDir { +pub fn encode(data: &mut ByteEncoder, imports: &[LibImport], code_start: usize) -> DataDir { + data.align(4); let start = data.pos() as u32; let idt = data.reserve_arr::(imports.len()); // null entry to mark end @@ -30,7 +21,6 @@ pub fn encode(data: &mut ByteEncoder, imports: &[LibImport]) -> DataDir { let lookup_start = data.pos(); let lookup = data.reserve_arr::(import.syms.len()); data.pad(size_of::()); - let lookup_end = data.pos(); for (i, sym) in import.syms.iter().enumerate() { let rva = hint_name_entry(data, 0, &sym.name); @@ -40,9 +30,17 @@ pub fn encode(data: &mut ByteEncoder, imports: &[LibImport]) -> DataDir { // address table data.align(size_of::()); let addr_start = data.pos(); - let len = lookup_end - lookup_start; - data.pad(len); - data.data.copy_within(lookup_start..lookup_end, addr_start); + for (i, sym) in import.syms.iter().enumerate() { + let here = data.pos() as i32; + for &usage in &sym.usages { + // NOTE: sets relative offet rn + let code_pos = code_start + usage; + data.set_at::(code_pos, here - code_pos as i32 - 4); + } + let entry = data[lookup][i]; + data.val(&entry); + } + data.pad(size_of::()); // entry data[idt][i] = ImportDirTable { @@ -69,6 +67,7 @@ pub struct ImportDirTable { } #[repr(C)] +#[derive(Clone, Copy)] pub struct ImportLookupEntry(u64); impl ImportLookupEntry { diff --git a/src/backend/container/pe/mod.rs b/src/backend/container/pe/mod.rs index 88665c7..3e9385e 100644 --- a/src/backend/container/pe/mod.rs +++ b/src/backend/container/pe/mod.rs @@ -3,13 +3,12 @@ mod header; mod import; use super::*; -use crate::backend::LinkedProgram; +use crate::backend::{LibImport, LinkedProgram}; use data_dir::*; use header::*; -pub use import::LibImport; -pub fn create(program: &[u8], start_offset: u64, imports: &[LibImport]) -> Vec { +pub fn create(program: &LinkedProgram) -> Vec { let mut data = ByteEncoder::default(); let file_align = 1; let section_align = 1; @@ -44,22 +43,26 @@ pub fn create(program: &[u8], start_offset: u64, imports: &[LibImport]) -> Vec(); let hdr_size = data.pos() as u32; - let code_start = data.pos() as u32; - if !imports.is_empty() { - let import_rva = import::encode(&mut data, &imports); + // .text start + let text_start = data.pos() as u32; + + let code_start = data.pos(); + data.extend(&program.code); + + if !program.imports.is_empty() { + let import_rva = import::encode(&mut data, &program.imports, code_start); data[data_dirs].import = import_rva; } - let program_start = data.pos() as u32; - data.extend(program); - let code_size = data.pos() as u32 - code_start; + let text_size = data.pos() as u32 - text_start; + // .text end data[code_sect] = Section { name: *b".text\0\0\0", - virtual_size: code_size, + virtual_size: text_size, virtual_addr: hdr_size.next_multiple_of(section_align), - raw_data_size: code_size.next_multiple_of(file_align), - raw_data_ptr: code_start, + raw_data_size: text_size.next_multiple_of(file_align), + raw_data_ptr: text_start, reloc_ptr: 0, line_num_ptr: 0, num_relocs: 0, @@ -73,11 +76,11 @@ pub fn create(program: &[u8], start_offset: u64, imports: &[LibImport]) -> Vec Vec { - pub fn to_pe(&self, imports: &[LibImport]) -> Vec { - create(&self.code, self.entry.expect("no start"), imports) + pub fn to_pe(&self) -> Vec { + create(&self) } } diff --git a/src/backend/ir/mod.rs b/src/backend/ir/mod.rs index f18e211..4eb3610 100644 --- a/src/backend/ir/mod.rs +++ b/src/backend/ir/mod.rs @@ -11,6 +11,7 @@ pub struct Program { pub entry: Option, pub external: Vec, + sym_info: Vec, sym_count: usize, } @@ -29,6 +30,11 @@ pub struct External { pub syms: Vec, } +pub struct SymInfo { + pub name: String, + pub external: bool, +} + pub enum Instr { Set { dst: VarId, src: Vec }, Call { dst: FnId, args: Vec }, @@ -48,16 +54,22 @@ impl Program { } } - pub fn ro_data(&mut self, bytes: impl Into>) -> Symbol { + pub fn ro_data(&mut self, name: impl Into, bytes: impl Into>) -> Symbol { let bytes = bytes.into(); - let sym = self.reserve(); + let sym = self.reserve(SymInfo { + name: name.into(), + external: false, + }); self.ro_data.push(Data { bytes, sym }); sym } - pub fn func(&mut self, instrs: impl Into>>) -> Symbol { + pub fn func(&mut self, name: impl Into, instrs: impl Into>>) -> Symbol { let instrs = instrs.into(); - let sym = self.reserve(); + let sym = self.reserve(SymInfo { + name: name.into(), + external: false, + }); self.funcs.push(Func { instrs, sym }); sym } @@ -68,8 +80,10 @@ impl Program { names: [impl Into; LEN], ) -> [Symbol; LEN] { let syms = names.map(|s| { - let sym = self.reserve(); - sym + self.reserve(SymInfo { + name: s.into(), + external: true, + }) }); self.external.push(External { file: file.into(), @@ -78,8 +92,9 @@ impl Program { syms } - fn reserve(&mut self) -> Symbol { + fn reserve(&mut self, info: SymInfo) -> Symbol { let res = Symbol(self.sym_count); + self.sym_info.push(info); self.sym_count += 1; res } @@ -91,6 +106,10 @@ impl Program { pub fn sym_count(&self) -> usize { self.sym_count } + + pub fn sym_info(&self, sym: Symbol) -> &SymInfo { + &self.sym_info[sym.0] + } } impl Default for Program { @@ -101,6 +120,7 @@ impl Default for Program { entry: Default::default(), sym_count: Default::default(), external: Default::default(), + sym_info: Default::default(), } } } diff --git a/src/backend/link.rs b/src/backend/link.rs index 7cb3964..6a86535 100644 --- a/src/backend/link.rs +++ b/src/backend/link.rs @@ -1,4 +1,15 @@ pub struct LinkedProgram { pub code: Vec, pub entry: Option, + pub imports: Vec, +} + +pub struct LibImport { + pub name: String, + pub syms: Vec, +} + +pub struct SymImport { + pub name: String, + pub usages: Vec, } diff --git a/src/parser/nodes/asm/x86_64.rs b/src/parser/nodes/asm/x86_64.rs index fc101eb..9722cde 100644 --- a/src/parser/nodes/asm/x86_64.rs +++ b/src/parser/nodes/asm/x86_64.rs @@ -56,12 +56,12 @@ pub fn parse_imm(mut s: &str, span: Span) -> Result { u64::from_str_radix(s, radix).map_err(|_| CompilerMsg::from(("invalid immediate", span))) } -pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result { +pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result { let next = ctx.expect_next()?; let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register or immediate"); Ok(match &next { - Token::Ident(ident) => RegModeImm::Reg(RegMode::parse(ident).ok_or_else(err)?), - Token::Lit(LitTy::Number(num)) => RegModeImm::Imm(parse_imm(num, ctx.span)?), + Token::Ident(ident) => RegImmMem::Reg(RegMode::parse(ident).ok_or_else(err)?), + Token::Lit(LitTy::Number(num)) => RegImmMem::Imm(parse_imm(num, ctx.span)?), _ => return Err(err()), }) } diff --git a/x86_64_test.exe b/x86_64_test.exe index 87d20b4923794f6b381fa7f9f37001343ede1521..14caa1d59043282dfc7c29da4e5a0b8cb563d2e8 100755 GIT binary patch delta 163 zcmbQkvVvto4Brk01Snvfn5D+3F>#_jW5dLY^-dtU0!9V~7>!WLz@Wgukl@k$Mq{T2 vQ1rj(Opx3TpjsGM0c3M9O%xQHJcCg{@CH=o1(g0Wc_*X20){A(evml;q5m3) delta 151 zcmZ3%GKXbC4Br|C1dw2wn5D)jGjXClW5L9W^+q7M0!9V~7>!WLz@Wgukgx(M2Lm&J m>>rF11;r+>U=$EM0hPG{rJqbb$!M>FA&R6QWKNcP#9;uA3>UKi