linking / symbol stuff

This commit is contained in:
2026-06-04 04:28:14 -04:00
parent 380a0f977a
commit 978bac88ed
9 changed files with 506 additions and 43 deletions
+68 -25
View File
@@ -1,4 +1,10 @@
use crate::io::CompilerMsg;
use crate::{
backend::{
program::{Addr, SymTable},
symbol::Symbol,
},
io::CompilerMsg,
};
pub struct Asm {
pub instrs: Vec<Instr>,
@@ -7,6 +13,7 @@ pub struct Asm {
pub enum Instr {
Mov { dst: RegMode, src: RegImm },
Int { code: u8 },
Lea { dst: RegMode, sym: Symbol },
}
pub enum RegImm {
@@ -22,22 +29,17 @@ pub struct RegMode {
high: bool,
}
impl Asm {
pub fn compile(&self, out: &mut Vec<u8>) -> Result<(), CompilerMsg> {
for instr in &self.instrs {
instr.compile(out)?;
}
Ok(())
}
}
impl Instr {
pub fn compile(&self, out: &mut Vec<u8>) -> Result<(), CompilerMsg> {
impl super::super::program::Instr for Instr {
fn encode(
&self,
data: &mut Vec<u8>,
syms: &mut SymTable,
) -> Result<Option<(Symbol, usize)>, CompilerMsg> {
match self {
Instr::Mov { dst, src } => {
let width = dst.width;
if width == BitWidth::B16 {
out.push(0x66);
data.push(0x66);
}
let dst8 = dst.gt8();
let b64 = width == BitWidth::B64;
@@ -47,31 +49,58 @@ impl Instr {
} else {
false
};
// special 64-bit / register 4-7 indicator
if dst8 || src8 || b64 || (dst.gt4() && !dst.high) {
out.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
data.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
}
match src {
RegImm::Reg(src) => {
if dst.width != src.width {
return Err("src and dst are not the same size".into());
}
out.push(0x88 | !b8 as u8);
data.push(0x88 | !b8 as u8);
let modrm = 0b11_000_000 | (src.base() << 3) | dst.base();
out.push(modrm);
data.push(modrm);
}
&RegImm::Imm(imm) => {
if imm > width.max() {
return Err("immediate cannot fit in register".into());
}
out.push(0xb0 | ((!b8 as u8) << 3) | dst.base());
out.extend(&imm.to_le_bytes()[..width.bytes()]);
data.push(0xb0 | ((!b8 as u8) << 3) | dst.base());
data.extend(&imm.to_le_bytes()[..width.bytes()]);
}
}
}
Instr::Int { code } => out.extend([0xcd, *code]),
Instr::Int { code } => data.extend([0xcd, *code]),
Instr::Lea { dst, sym } => {
data.extend([
0x48 | ((dst.gt8() as u8) << 2),
0x8d,
0x05 | (dst.base() << 3),
]);
let Some(addr) = syms.get(*sym) else {
let pos = data.len();
data.extend([0; 4]);
return Ok(Some((*sym, pos)));
};
data.extend(addr_offset(data.len(), addr));
}
}
Ok(())
Ok(None)
}
fn insert_sym(&self, data: &mut Vec<u8>, pos: usize, addr: Addr) {
match self {
Self::Lea { .. } => data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)),
_ => panic!("unkown symbol insertion"),
}
}
}
/// assumes the next instruction is directly after
fn addr_offset(pos: usize, addr: Addr) -> [u8; 4] {
let pos = (pos + 4) as i32;
let offset = addr.val() as i32 - pos;
offset.to_le_bytes()
}
impl RegMode {
@@ -169,10 +198,21 @@ impl BitWidth {
}
}
pub fn mov(dst: RegMode, src: impl Into<RegImm>) -> Instr {
Instr::Mov {
dst,
src: src.into(),
pub mod instr {
use super::*;
pub fn mov(dst: RegMode, src: impl Into<RegImm>) -> Instr {
Instr::Mov {
dst,
src: src.into(),
}
}
pub fn lea(dst: RegMode, sym: Symbol) -> Instr {
Instr::Lea { dst, sym }
}
pub fn int(code: u8) -> Instr {
Instr::Int { code }
}
}
@@ -190,13 +230,16 @@ impl From<u64> for RegImm {
#[cfg(test)]
mod test {
use crate::backend::program::Instr as _;
use super::*;
use instr::*;
use reg::*;
fn eq(expected: impl AsRef<[u8]>, got: Instr) {
let expected = expected.as_ref();
let mut res = Vec::new();
if let Err(e) = got.compile(&mut res) {
if let Err(e) = got.encode(&mut res, &mut SymTable::new(0)) {
panic!("expected {expected:x?}, failed to compile: {}", e.msg);
}
assert_eq!(expected, &res[..], "expected {expected:x?}, got {res:x?}");
+2 -2
View File
@@ -52,14 +52,14 @@ pub struct SectionHeader {
pub enum Arch {
X86_64,
RISCV,
Riscv,
}
impl Arch {
pub fn machine(&self) -> u16 {
match self {
Arch::X86_64 => 0x3e,
Arch::RISCV => 0xf3,
Arch::Riscv => 0xf3,
}
}
}
+145
View File
@@ -0,0 +1,145 @@
use crate::backend::Addr;
#[repr(C)]
pub struct ELF64Header {
magic: u32,
class: u8,
endianness: u8,
ei_version: u8,
os_abi: u8,
os_abi_ver: u8,
pad: [u8; 7],
ty: u16,
machine: u16,
e_version: u32,
entry: u64,
program_header_offset: u64,
section_header_offset: u64,
flags: u32,
header_size: u16,
program_header_entry_size: u16,
program_header_num: u16,
section_header_entry_size: u16,
section_header_num: u16,
section_header_str_idx: u16,
}
#[repr(C)]
pub struct ProgramHeader {
ty: u32,
flags: u32,
offset: u64,
vaddr: u64,
paddr: u64,
filesz: u64,
memsz: u64,
align: u64,
}
#[repr(C)]
pub struct SectionHeader {
name_idx: u32,
ty: u32,
flags: u64,
addr: u64,
offset: u64,
size: u64,
link: u32,
info: u32,
addr_align: u64,
entry_size: u64,
}
pub enum Arch {
X86_64,
Riscv,
}
impl Arch {
pub fn machine(&self) -> u16 {
match self {
Arch::X86_64 => 0x3e,
Arch::Riscv => 0xf3,
}
}
}
// this is currently specialized for riscv64; obviously add params later
pub fn create(program: &[u8], start_offset: Addr) -> Vec<u8> {
let addr_start = 0x400000;
let page_size = 0x1000;
// I don't know if I have to add addr_start here, idk how it maps the memory
let program_size = std::mem::size_of_val(program) as u64;
let program_header = ProgramHeader {
ty: 0x1, // LOAD
flags: 0b101, // executable, readable
offset: 0x0,
vaddr: addr_start,
paddr: 0x0,
filesz: program_size,
memsz: program_size,
align: page_size,
};
let header_len = (size_of::<ELF64Header>() + size_of::<ProgramHeader>()) as u64;
let program_pos = header_len;
let header = ELF64Header {
magic: 0x7f_45_4c_46u32.swap_bytes(),
class: 0x2, // 64 bit
endianness: 0x1, // little endian
ei_version: 0x1,
os_abi: 0x0, // system-v
os_abi_ver: 0x0,
pad: [0x0; 7],
ty: 0x2, // executable
machine: Arch::X86_64.machine(),
e_version: 0x1,
entry: addr_start + program_pos + start_offset.val(),
program_header_offset: size_of::<ELF64Header>() as u64,
section_header_offset: program_pos + program.len() as u64,
// C ABI (16 bit instruction align) + double precision floats
flags: 0x1 | 0x4,
header_size: size_of::<ELF64Header>() as u16,
program_header_entry_size: size_of::<ProgramHeader>() as u16,
program_header_num: 0x1,
section_header_entry_size: size_of::<SectionHeader>() as u16,
section_header_num: 0x2,
section_header_str_idx: 0x0,
};
let section_null = SectionHeader {
name_idx: 0,
ty: 0,
flags: 0,
addr: 0,
offset: 0,
size: 0,
link: 0,
info: 0,
addr_align: 0,
entry_size: 0,
};
let section = SectionHeader {
name_idx: 0,
ty: 0x1, // program data
flags: (1 << 1) | (1 << 2), // alloc & exec
addr: addr_start,
offset: program_pos,
size: program.len() as u64,
link: 0,
info: 0,
addr_align: 1,
entry_size: 0,
};
let mut bytes: Vec<u8> = Vec::new();
unsafe {
bytes.extend(as_u8_slice(&header));
bytes.extend(as_u8_slice(&program_header));
bytes.extend(program);
bytes.extend(as_u8_slice(&section_null));
bytes.extend(as_u8_slice(&section));
}
bytes
}
unsafe fn as_u8_slice<T: Sized>(p: &T) -> &[u8] {
unsafe { core::slice::from_raw_parts((p as *const T) as *const u8, size_of::<T>()) }
}
+4 -8
View File
@@ -1,17 +1,13 @@
pub mod arch;
pub mod elf;
mod program;
mod symbol;
mod test;
pub use arch::*;
pub use test::*;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Addr(u64);
impl Addr {
const NONE: Self = Self(!0);
pub fn val(&self) -> u64 {
self.0
}
}
use program::*;
pub enum Asm {
X86_64(x86_64::Asm),
+111
View File
@@ -0,0 +1,111 @@
use crate::{
backend::{elf, symbol::Symbol},
io::CompilerMsg,
};
use std::collections::HashMap;
pub struct LinkedProgram {
pub code: Vec<u8>,
pub start: Option<Addr>,
}
pub struct UnlinkedProgram<I: Instr> {
pub fns: Vec<UnlinkedFunction<I>>,
pub ro_data: Vec<(Vec<u8>, Symbol)>,
pub sym_count: usize,
pub start: Option<Symbol>,
}
pub struct UnlinkedFunction<I: Instr> {
pub instrs: Vec<I>,
pub sym: Symbol,
// locations within the function for loops & such / goto
pub locations: HashMap<usize, Symbol>,
}
impl<I: Instr> UnlinkedProgram<I> {
pub fn link(self) -> Result<LinkedProgram, CompilerMsg> {
let mut data = Vec::new();
let mut sym_table = SymTable::new(self.sym_count);
let mut missing = HashMap::<Symbol, Vec<(I, usize)>>::new();
for (val, id) in self.ro_data {
sym_table.insert(id, Addr(data.len() as u64));
data.extend(val);
}
data.resize(data.len() + (4 - data.len() % 4), 0);
for f in self.fns {
let addr = Addr(data.len() as u64);
let mut added = vec![(f.sym, addr)];
sym_table.insert(f.sym, addr);
for (i, instr) in f.instrs.into_iter().enumerate() {
let i_pos = Addr(data.len() as u64);
if let Some(sym) = f.locations.get(&i) {
sym_table.insert(*sym, i_pos);
added.push((*sym, i_pos));
}
if let Some((sym, marker)) = instr.encode(&mut data, &mut sym_table)? {
let info = (instr, marker);
if let Some(vec) = missing.get_mut(&sym) {
vec.push(info);
} else {
missing.insert(sym, vec![info]);
}
}
}
for (sym, addr) in added {
if let Some(vec) = missing.remove(&sym) {
for (i, marker) in vec {
i.insert_sym(&mut data, marker, addr);
}
}
}
}
assert!(missing.is_empty());
Ok(LinkedProgram {
code: data,
start: self
.start
.map(|s| sym_table.get(s).expect("start symbol doesn't exist")),
})
}
}
pub trait Instr {
fn encode(
&self,
data: &mut Vec<u8>,
syms: &mut SymTable,
) -> Result<Option<(Symbol, usize)>, CompilerMsg>;
fn insert_sym(&self, data: &mut Vec<u8>, pos: usize, addr: Addr);
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Addr(u64);
impl Addr {
pub const ZERO: Self = Self(!0);
pub fn val(&self) -> u64 {
self.0
}
}
pub struct SymTable(Vec<Addr>);
impl SymTable {
pub fn new(len: usize) -> Self {
Self(vec![Addr::ZERO; len])
}
pub fn insert(&mut self, sym: Symbol, addr: Addr) {
self.0[sym.val()] = addr;
}
pub fn get(&self, sym: Symbol) -> Option<Addr> {
match self.0[sym.val()] {
Addr::ZERO => None,
addr => Some(addr),
}
}
}
impl LinkedProgram {
pub fn to_elf(&self) -> Vec<u8> {
elf::create(&self.code, self.start.expect("no start"))
}
}
+147
View File
@@ -0,0 +1,147 @@
use std::collections::HashMap;
#[derive(Clone, Copy, Hash, PartialEq, Eq)]
pub struct Symbol(usize);
impl Symbol {
pub fn val(&self) -> usize {
self.0
}
pub fn raw(val: usize) -> Self {
Self(val)
}
}
/// intentionally does not have copy or clone;
/// this should only be consumed once
pub struct WritableSymbol(Symbol);
impl std::ops::Deref for WritableSymbol {
type Target = Symbol;
fn deref(&self) -> &Self::Target {
&self.0
}
}
// pub struct SymbolSpace {
// ro_data: Vec<(Symbol, Vec<u8>)>,
// fns: Vec<(Symbol, IRLFunction)>,
// len: usize,
// labels: Vec<Option<String>>,
// }
//
// pub struct SymbolSpaceBuilder {
// symbols: usize,
// unwritten_fns: Vec<(WritableSymbol, FnID)>,
// fn_map: HashMap<FnID, Symbol>,
// data_map: HashMap<DataID, Symbol>,
// ro_data: Vec<(Symbol, Vec<u8>)>,
// fns: Vec<(Symbol, IRLFunction)>,
// labels: Vec<Option<String>>,
// }
//
// impl SymbolSpace {
// pub fn ro_data(&self) -> &[(Symbol, Vec<u8>)] {
// &self.ro_data
// }
// pub fn fns(&self) -> &[(Symbol, IRLFunction)] {
// &self.fns
// }
// pub fn labels(&self) -> &[Option<String>] {
// &self.labels
// }
// pub fn len(&self) -> usize {
// self.len
// }
// }
//
// impl SymbolSpaceBuilder {
// pub fn new() -> Self {
// Self {
// symbols: 0,
// unwritten_fns: Vec::new(),
// fn_map: HashMap::new(),
// data_map: HashMap::new(),
// ro_data: Vec::new(),
// fns: Vec::new(),
// labels: Vec::new(),
// }
// }
// pub fn with_entries(entries: &[FnID]) -> SymbolSpaceBuilder {
// let mut s = Self::new();
// for e in entries {
// s.func(*e);
// }
// s
// }
// pub fn pop_fn(&mut self) -> Option<(WritableSymbol, FnID)> {
// self.unwritten_fns.pop()
// }
// pub fn anon_ro_data(&mut self, data: &[u8], label: Option<String>) -> Symbol {
// let sym = self.reserve();
// self.write_ro_data(sym, data.to_vec(), label)
// }
// pub fn ro_data(&mut self, id: DataID, data: &[u8], label: Option<&str>) -> Symbol {
// match self.data_map.get(&id) {
// Some(s) => *s,
// None => {
// let sym = self.reserve();
// self.data_map.insert(id, *sym);
// self.write_ro_data(sym, data.to_vec(), label.map(|l| l.to_string()))
// }
// }
// }
// pub fn func(&mut self, id: FnID) -> Symbol {
// match self.fn_map.get(&id) {
// Some(s) => *s,
// None => {
// let wsym = self.reserve();
// let sym = *wsym;
// self.unwritten_fns.push((wsym, id));
// self.fn_map.insert(id, sym);
// sym
// }
// }
// }
// pub fn write_ro_data(
// &mut self,
// sym: WritableSymbol,
// data: Vec<u8>,
// name: Option<String>,
// ) -> Symbol {
// self.ro_data.push((*sym, data));
// self.labels[sym.0.0] = name;
// *sym
// }
// pub fn write_fn(
// &mut self,
// sym: WritableSymbol,
// func: IRLFunction,
// name: Option<String>,
// ) -> Symbol {
// self.fns.push((*sym, func));
// self.labels[sym.0.0] = name;
// *sym
// }
// pub fn reserve(&mut self) -> WritableSymbol {
// let val = self.symbols;
// self.symbols += 1;
// self.labels.push(None);
// WritableSymbol(Symbol(val))
// }
// pub fn len(&self) -> usize {
// self.symbols
// }
// pub fn finish(self) -> Option<SymbolSpace> {
// if self.unwritten_fns.is_empty() {
// Some(SymbolSpace {
// len: self.symbols,
// fns: self.fns,
// ro_data: self.ro_data,
// labels: self.labels,
// })
// } else {
// None
// }
// }
// }
+26 -7
View File
@@ -1,16 +1,35 @@
use crate::backend::{
Addr, elf,
x86_64::{Asm, Instr, mov, reg::*},
program::{UnlinkedFunction, UnlinkedProgram},
symbol::Symbol,
x86_64::{instr::*, reg::*},
};
use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt, process::Command};
pub fn test_x86_64() {
let asm = Asm {
instrs: vec![mov(eax, 1), mov(ebx, 39), Instr::Int { code: 0x80 }],
let s = b"Hello world!\n";
let program = UnlinkedProgram {
fns: vec![UnlinkedFunction {
instrs: vec![
mov(eax, 4),
mov(ebx, 1),
lea(ecx, Symbol::raw(1)),
mov(edx, s.len() as u64),
int(0x80),
mov(eax, 1),
mov(ebx, 39),
int(0x80),
],
sym: Symbol::raw(0),
locations: Default::default(),
}],
ro_data: vec![(s.to_vec(), Symbol::raw(1))],
sym_count: 3,
start: Some(Symbol::raw(0)),
};
let mut out = Vec::new();
asm.compile(&mut out).expect("failed to compile");
let binary = elf::create(&out, Addr(0));
let Ok(linked) = program.link() else {
panic!("failed to link");
};
let binary = linked.to_elf();
let path = "./x86_64_test";
let mut file = OpenOptions::new()
.create(true)
+3 -1
View File
@@ -9,6 +9,8 @@ use crate::{
parser::{self, ExprTy, Ident, Node, parse_file},
};
const EXTENSION: &str = ".lang";
pub fn parse_program(path: impl AsRef<Path>, output: &mut CompilerOutput) -> Option<Ir> {
let path = path.as_ref();
let mut imports = Imports::default();
@@ -16,7 +18,7 @@ pub fn parse_program(path: impl AsRef<Path>, output: &mut CompilerOutput) -> Opt
imports.add(path.file_stem().unwrap().to_str().unwrap());
while let Some(next) = imports.new.pop() {
imports.done.insert(next.clone());
let path = dir.join(next + ".lang");
let path = dir.join(next + EXTENSION);
println!("=== {path:?}");
let body = parse_file(path, output)?;
print!("{}", body.new_dsp());
BIN
View File
Binary file not shown.