IMPORTS WORKING

This commit is contained in:
2026-06-09 00:08:26 -04:00
parent 6bc502d284
commit e4acaf40aa
11 changed files with 205 additions and 115 deletions
+10 -8
View File
@@ -9,18 +9,20 @@ pub struct Asm {
#[derive(Clone, Copy)]
pub enum Instr {
Mov { dst: RegMode, src: RegModeImm },
Mov { dst: RegMode, src: RegImmMem },
Int(u8),
Call(Symbol),
CallMem(Symbol),
Ret,
Syscall,
Lea { dst: RegMode, sym: Symbol },
Push(RegImm),
Pop(Reg),
Sub,
}
#[derive(Clone, Copy)]
pub enum RegModeImm {
pub enum RegImmMem {
Reg(RegMode),
Imm(u64),
}
@@ -31,13 +33,13 @@ pub enum RegImm {
Imm(u64),
}
impl From<RegMode> for RegModeImm {
impl From<RegMode> for RegImmMem {
fn from(value: RegMode) -> Self {
Self::Reg(value)
}
}
impl From<u64> for RegModeImm {
impl From<u64> for RegImmMem {
fn from(value: u64) -> Self {
Self::Imm(value)
}
@@ -45,7 +47,7 @@ impl From<u64> for RegModeImm {
mod fns {
use super::*;
pub fn mov(dst: RegMode, src: impl Into<RegModeImm>) -> Instr {
pub fn mov(dst: RegMode, src: impl Into<RegImmMem>) -> Instr {
Instr::Mov {
dst,
src: src.into(),
@@ -56,14 +58,14 @@ mod fns {
Instr::Lea { dst, sym }
}
pub fn push(reg: impl Into<RegModeImm>) -> Instr {
pub fn push(reg: impl Into<RegImmMem>) -> Instr {
Instr::Push(match reg.into() {
RegModeImm::Reg(reg) => {
RegImmMem::Reg(reg) => {
assert_eq!(reg.width, BitWidth::B64);
assert!(!reg.high);
RegImm::Reg(reg.reg)
}
RegModeImm::Imm(imm) => RegImm::Imm(imm),
RegImmMem::Imm(imm) => RegImm::Imm(imm),
})
}
+47 -13
View File
@@ -1,14 +1,18 @@
use super::*;
use crate::backend::{LinkedProgram, SymTable, Symbol};
use std::collections::HashMap;
pub struct Encoder {
use super::*;
use crate::backend::{LibImport, LinkedProgram, SymImport, SymTable, Symbol};
pub struct Encoder<'a> {
pub data: Vec<u8>,
pub sym_tab: SymTable<u64>,
pub missing: Vec<(usize, Symbol)>,
pub sym_refs: HashMap<Symbol, Vec<usize>>,
pub program: &'a Program<X86_64>,
}
pub fn compile(p: &Program<X86_64>) -> Result<LinkedProgram<u64>, CompilerMsg> {
let mut encoder = Encoder::new(p.sym_count());
let mut encoder = Encoder::new(p);
p.encode_data(&mut encoder.data, &mut encoder.sym_tab);
@@ -28,9 +32,26 @@ pub fn compile(p: &Program<X86_64>) -> Result<LinkedProgram<u64>, CompilerMsg> {
encoder.data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr))
}
let imports = p
.external
.iter()
.map(|e| LibImport {
name: e.file.clone(),
syms: e
.syms
.iter()
.map(|&s| SymImport {
name: p.sym_info(s).name.clone(),
usages: encoder.sym_refs.entry(s).or_default().clone(),
})
.collect(),
})
.collect();
Ok(LinkedProgram {
code: encoder.data,
entry: p.entry.and_then(|e| encoder.sym_tab.get(e)),
imports,
})
}
@@ -47,10 +68,10 @@ fn compile_instr(encoder: &mut Encoder, instr: &BInstr) -> Result<(), CompilerMs
Ok(())
}
impl Encoder {
impl Encoder<'_> {
// assembly
pub fn mov(&mut self, dst: RegMode, src: impl Into<RegModeImm>) -> Result<(), CompilerMsg> {
pub fn mov(&mut self, dst: RegMode, src: impl Into<RegImmMem>) -> Result<(), CompilerMsg> {
let src = src.into();
let width = dst.width;
if width == BitWidth::B16 {
@@ -59,7 +80,7 @@ impl Encoder {
let dst8 = dst.gt8();
let b64 = width == BitWidth::B64;
let b8 = width == BitWidth::B8;
let src8 = if let RegModeImm::Reg(src) = src {
let src8 = if let RegImmMem::Reg(src) = src {
src.gt8()
} else {
false
@@ -70,7 +91,7 @@ impl Encoder {
.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
}
match src {
RegModeImm::Reg(src) => {
RegImmMem::Reg(src) => {
if dst.width != src.width {
return Err("src and dst are not the same size".into());
}
@@ -78,7 +99,7 @@ impl Encoder {
let modrm = 0b11_000_000 | (src.base() << 3) | dst.base();
self.data.push(modrm);
}
RegModeImm::Imm(imm) => {
RegImmMem::Imm(imm) => {
if imm > width.max() {
return Err("immediate cannot fit in register".into());
}
@@ -111,6 +132,11 @@ impl Encoder {
self.sym_offset4(sym);
}
pub fn call_mem(&mut self, sym: Symbol) {
self.data.extend([0xff, 0x15]);
self.sym_offset4(sym);
}
pub fn ret(&mut self) {
self.data.push(0xc3);
}
@@ -145,7 +171,11 @@ impl Encoder {
let Some(addr) = self.sym_tab.get(sym) else {
let pos = self.data.len();
self.data.extend([0; 4]);
self.missing.push((pos, sym));
if self.program.sym_info(sym).external {
self.sym_refs.entry(sym).or_default().push(pos);
} else {
self.missing.push((pos, sym));
}
return;
};
self.data.extend(addr_offset(self.data.len(), addr));
@@ -164,6 +194,8 @@ impl Encoder {
RegImm::Imm(imm) => self.push_imm(imm),
},
Instr::Pop(reg) => self.pop(reg),
Instr::CallMem(sym) => self.call_mem(sym),
Instr::Sub => self.data.extend([0x48, 0x83, 0xec, 0x28]),
}
Ok(())
}
@@ -176,12 +208,14 @@ fn addr_offset(pos: usize, addr: u64) -> [u8; 4] {
offset.to_le_bytes()
}
impl Encoder {
pub fn new(sym_count: usize) -> Self {
impl<'a> Encoder<'a> {
pub fn new(program: &'a Program<X86_64>) -> Self {
Self {
data: Default::default(),
sym_tab: SymTable::new(sym_count),
sym_tab: SymTable::new(program.sym_count()),
missing: Default::default(),
sym_refs: Default::default(),
program,
}
}
}
+50 -50
View File
@@ -1,6 +1,6 @@
use crate::{
arch::x86_64::*,
backend::{Instr as BInstr, Program, pe::LibImport},
backend::{Instr as BInstr, Program},
};
use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt, process::Command};
@@ -12,34 +12,40 @@ pub fn run() {
fn linux() {
let mut program = Program::<X86_64>::default();
let text = b"Hello world!\n";
let text_sym = program.ro_data(text);
let text_sym = program.ro_data("hello_en", text);
let text2 = "世界、こんにちは!\n";
let text_sym2 = program.ro_data(text2);
let hello2 = program.func([BInstr::Asm(Asm {
instrs: vec![
mov(ax, 1),
mov(di, 1),
lea(rsi, text_sym2),
mov(dx, text2.len() as u64),
Instr::Syscall,
Instr::Ret,
],
})]);
let entry = program.func([BInstr::Asm(Asm {
instrs: vec![
mov(di, 39),
push(rdi),
mov(ax, 1),
mov(di, 1),
lea(rsi, text_sym),
mov(dx, text.len() as u64),
Instr::Syscall,
Instr::Call(hello2),
mov(ax, 0x3c),
pop(rdi),
Instr::Syscall,
],
})]);
let text_sym2 = program.ro_data("hello_jp", text2);
let hello2 = program.func(
"hello2",
[BInstr::Asm(Asm {
instrs: vec![
mov(ax, 1),
mov(di, 1),
lea(rsi, text_sym2),
mov(dx, text2.len() as u64),
Instr::Syscall,
Instr::Ret,
],
})],
);
let entry = program.func(
"main",
[BInstr::Asm(Asm {
instrs: vec![
mov(di, 39),
push(rdi),
mov(ax, 1),
mov(di, 1),
lea(rsi, text_sym),
mov(dx, text.len() as u64),
Instr::Syscall,
Instr::Call(hello2),
mov(ax, 0x3c),
pop(rdi),
Instr::Syscall,
],
})],
);
program.entry = Some(entry);
let linked = program.compile().expect("failed to compile");
let binary = linked.to_elf();
@@ -71,34 +77,28 @@ fn linux() {
fn windows() {
let mut program = Program::<X86_64>::default();
let entry = program.func([BInstr::Asm(Asm {
instrs: vec![
push(39),
pop(rax),
Instr::Ret
],
})]);
let [get_std_handle, write_file, exit_process] =
program.external("KERNEL32.dll", ["GetStdHandle", "WriteFile", "ExitProcess"]);
let entry = program.func(
"main",
[BInstr::Asm(Asm {
instrs: vec![Instr::Sub, mov(ecx, 40), Instr::CallMem(exit_process)],
})],
);
program.entry = Some(entry);
let linked = program.compile().expect("failed to compile");
let imports = &[LibImport {
name: "KERNEL32.dll".to_string(),
syms: ["GetStdHandle", "WriteFile", "ExitProcess"]
.map(String::from)
.to_vec(),
}];
let binary = linked.to_pe(imports);
let binary = linked.to_pe();
let path = "./x86_64_test.exe";
write(path, &binary);
// let mut cmd = Command::new("wine");
// cmd.arg("x86_64_test");
// let mut proc = cmd.spawn().expect("failed to run");
// let status = proc.wait().expect("failed to wait");
// if let Some(code) = status.code() {
// std::process::exit(code);
// }
let mut cmd = Command::new("wine");
cmd.arg("x86_64_test");
let mut proc = cmd.spawn().expect("failed to run");
let status = proc.wait().expect("failed to wait");
if let Some(code) = status.code() {
std::process::exit(code);
}
}
fn write(path: &str, binary: &[u8]) {
+2 -1
View File
@@ -2,7 +2,8 @@ use super::*;
fn eq(expected: impl AsRef<[u8]>, asm: Instr) {
let expected = expected.as_ref();
let mut encoder = Encoder::new(0);
let program = Program::default();
let mut encoder = Encoder::new(&program);
if let Err(e) = encoder.asm(asm) {
panic!("expected {expected:x?}, failed to compile: {}", e.msg);
}
+20
View File
@@ -26,6 +26,26 @@ impl ByteEncoder {
self.data.resize(self.data.len().next_multiple_of(align), 0);
}
fn ptr_at<T>(&mut self, index: usize) -> *mut T {
let slice = &mut self.data[index..index + size_of::<T>()];
(slice as *mut [u8]) as *mut T
}
pub fn set_at<T>(&mut self, index: usize, val: T) {
let ptr = self.ptr_at::<T>(index);
unsafe {
ptr.write_unaligned(val);
}
}
pub fn edit_at<T>(&mut self, index: usize, edit: impl FnOnce(T) -> T) {
let ptr = self.ptr_at::<T>(index);
unsafe {
let val = ptr.read_unaligned();
ptr.write_unaligned(edit(val));
}
}
#[must_use]
pub fn reserve<T>(&mut self) -> Reserved<T> {
let pos = self.pos();
+15 -16
View File
@@ -1,18 +1,9 @@
use crate::backend::pe::data_dir::DataDir;
use crate::backend::{LibImport, pe::data_dir::DataDir};
use super::ByteEncoder;
pub struct LibImport {
pub name: String,
pub syms: Vec<SymImport>,
}
pub struct SymImport {
name: String,
usages: Vec<usize>,
}
pub fn encode(data: &mut ByteEncoder, imports: &[LibImport]) -> DataDir {
pub fn encode(data: &mut ByteEncoder, imports: &[LibImport], code_start: usize) -> DataDir {
data.align(4);
let start = data.pos() as u32;
let idt = data.reserve_arr::<ImportDirTable>(imports.len());
// null entry to mark end
@@ -30,7 +21,6 @@ pub fn encode(data: &mut ByteEncoder, imports: &[LibImport]) -> DataDir {
let lookup_start = data.pos();
let lookup = data.reserve_arr::<ImportLookupEntry>(import.syms.len());
data.pad(size_of::<ImportLookupEntry>());
let lookup_end = data.pos();
for (i, sym) in import.syms.iter().enumerate() {
let rva = hint_name_entry(data, 0, &sym.name);
@@ -40,9 +30,17 @@ pub fn encode(data: &mut ByteEncoder, imports: &[LibImport]) -> DataDir {
// address table
data.align(size_of::<ImportLookupEntry>());
let addr_start = data.pos();
let len = lookup_end - lookup_start;
data.pad(len);
data.data.copy_within(lookup_start..lookup_end, addr_start);
for (i, sym) in import.syms.iter().enumerate() {
let here = data.pos() as i32;
for &usage in &sym.usages {
// NOTE: sets relative offet rn
let code_pos = code_start + usage;
data.set_at::<i32>(code_pos, here - code_pos as i32 - 4);
}
let entry = data[lookup][i];
data.val(&entry);
}
data.pad(size_of::<ImportLookupEntry>());
// entry
data[idt][i] = ImportDirTable {
@@ -69,6 +67,7 @@ pub struct ImportDirTable {
}
#[repr(C)]
#[derive(Clone, Copy)]
pub struct ImportLookupEntry(u64);
impl ImportLookupEntry {
+20 -17
View File
@@ -3,13 +3,12 @@ mod header;
mod import;
use super::*;
use crate::backend::LinkedProgram;
use crate::backend::{LibImport, LinkedProgram};
use data_dir::*;
use header::*;
pub use import::LibImport;
pub fn create(program: &[u8], start_offset: u64, imports: &[LibImport]) -> Vec<u8> {
pub fn create(program: &LinkedProgram<u64>) -> Vec<u8> {
let mut data = ByteEncoder::default();
let file_align = 1;
let section_align = 1;
@@ -44,22 +43,26 @@ pub fn create(program: &[u8], start_offset: u64, imports: &[LibImport]) -> Vec<u
let code_sect = data.reserve::<Section>();
let hdr_size = data.pos() as u32;
let code_start = data.pos() as u32;
if !imports.is_empty() {
let import_rva = import::encode(&mut data, &imports);
// .text start
let text_start = data.pos() as u32;
let code_start = data.pos();
data.extend(&program.code);
if !program.imports.is_empty() {
let import_rva = import::encode(&mut data, &program.imports, code_start);
data[data_dirs].import = import_rva;
}
let program_start = data.pos() as u32;
data.extend(program);
let code_size = data.pos() as u32 - code_start;
let text_size = data.pos() as u32 - text_start;
// .text end
data[code_sect] = Section {
name: *b".text\0\0\0",
virtual_size: code_size,
virtual_size: text_size,
virtual_addr: hdr_size.next_multiple_of(section_align),
raw_data_size: code_size.next_multiple_of(file_align),
raw_data_ptr: code_start,
raw_data_size: text_size.next_multiple_of(file_align),
raw_data_ptr: text_start,
reloc_ptr: 0,
line_num_ptr: 0,
num_relocs: 0,
@@ -73,11 +76,11 @@ pub fn create(program: &[u8], start_offset: u64, imports: &[LibImport]) -> Vec<u
magic: 0x20b,
major_linker_ver: 8,
minor_linker_ver: 0,
code_size: code_size.next_multiple_of(file_align),
code_size: text_size.next_multiple_of(file_align),
init_data_size: 0,
uninit_data_size: 0,
entry_addr: program_start + start_offset as u32,
code_base: code_start,
entry_addr: (code_start as u64 + program.entry.unwrap()) as u32,
code_base: text_start,
image_base: 0x400000,
section_align,
file_align,
@@ -105,7 +108,7 @@ pub fn create(program: &[u8], start_offset: u64, imports: &[LibImport]) -> Vec<u
}
impl LinkedProgram<u64> {
pub fn to_pe(&self, imports: &[LibImport]) -> Vec<u8> {
create(&self.code, self.entry.expect("no start"), imports)
pub fn to_pe(&self) -> Vec<u8> {
create(&self)
}
}
+27 -7
View File
@@ -11,6 +11,7 @@ pub struct Program<A: Arch> {
pub entry: Option<Symbol>,
pub external: Vec<External>,
sym_info: Vec<SymInfo>,
sym_count: usize,
}
@@ -29,6 +30,11 @@ pub struct External {
pub syms: Vec<Symbol>,
}
pub struct SymInfo {
pub name: String,
pub external: bool,
}
pub enum Instr<A: Arch> {
Set { dst: VarId, src: Vec<u8> },
Call { dst: FnId, args: Vec<VarId> },
@@ -48,16 +54,22 @@ impl<A: Arch> Program<A> {
}
}
pub fn ro_data(&mut self, bytes: impl Into<Vec<u8>>) -> Symbol {
pub fn ro_data(&mut self, name: impl Into<String>, bytes: impl Into<Vec<u8>>) -> Symbol {
let bytes = bytes.into();
let sym = self.reserve();
let sym = self.reserve(SymInfo {
name: name.into(),
external: false,
});
self.ro_data.push(Data { bytes, sym });
sym
}
pub fn func(&mut self, instrs: impl Into<Vec<Instr<A>>>) -> Symbol {
pub fn func(&mut self, name: impl Into<String>, instrs: impl Into<Vec<Instr<A>>>) -> Symbol {
let instrs = instrs.into();
let sym = self.reserve();
let sym = self.reserve(SymInfo {
name: name.into(),
external: false,
});
self.funcs.push(Func { instrs, sym });
sym
}
@@ -68,8 +80,10 @@ impl<A: Arch> Program<A> {
names: [impl Into<String>; LEN],
) -> [Symbol; LEN] {
let syms = names.map(|s| {
let sym = self.reserve();
sym
self.reserve(SymInfo {
name: s.into(),
external: true,
})
});
self.external.push(External {
file: file.into(),
@@ -78,8 +92,9 @@ impl<A: Arch> Program<A> {
syms
}
fn reserve(&mut self) -> Symbol {
fn reserve(&mut self, info: SymInfo) -> Symbol {
let res = Symbol(self.sym_count);
self.sym_info.push(info);
self.sym_count += 1;
res
}
@@ -91,6 +106,10 @@ impl<A: Arch> Program<A> {
pub fn sym_count(&self) -> usize {
self.sym_count
}
pub fn sym_info(&self, sym: Symbol) -> &SymInfo {
&self.sym_info[sym.0]
}
}
impl<A: Arch> Default for Program<A> {
@@ -101,6 +120,7 @@ impl<A: Arch> Default for Program<A> {
entry: Default::default(),
sym_count: Default::default(),
external: Default::default(),
sym_info: Default::default(),
}
}
}
+11
View File
@@ -1,4 +1,15 @@
pub struct LinkedProgram<Addr> {
pub code: Vec<u8>,
pub entry: Option<Addr>,
pub imports: Vec<LibImport>,
}
pub struct LibImport {
pub name: String,
pub syms: Vec<SymImport>,
}
pub struct SymImport {
pub name: String,
pub usages: Vec<usize>,
}
+3 -3
View File
@@ -56,12 +56,12 @@ pub fn parse_imm(mut s: &str, span: Span) -> Result<u64, CompilerMsg> {
u64::from_str_radix(s, radix).map_err(|_| CompilerMsg::from(("invalid immediate", span)))
}
pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result<RegModeImm, CompilerMsg> {
pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result<RegImmMem, CompilerMsg> {
let next = ctx.expect_next()?;
let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register or immediate");
Ok(match &next {
Token::Ident(ident) => RegModeImm::Reg(RegMode::parse(ident).ok_or_else(err)?),
Token::Lit(LitTy::Number(num)) => RegModeImm::Imm(parse_imm(num, ctx.span)?),
Token::Ident(ident) => RegImmMem::Reg(RegMode::parse(ident).ok_or_else(err)?),
Token::Lit(LitTy::Number(num)) => RegImmMem::Imm(parse_imm(num, ctx.span)?),
_ => return Err(err()),
})
}
BIN
View File
Binary file not shown.