linking / symbol stuff

This commit is contained in:
2026-06-04 04:28:14 -04:00
parent 380a0f977a
commit 978bac88ed
9 changed files with 506 additions and 43 deletions
+68 -25
View File
@@ -1,4 +1,10 @@
use crate::io::CompilerMsg;
use crate::{
backend::{
program::{Addr, SymTable},
symbol::Symbol,
},
io::CompilerMsg,
};
pub struct Asm {
pub instrs: Vec<Instr>,
@@ -7,6 +13,7 @@ pub struct Asm {
pub enum Instr {
Mov { dst: RegMode, src: RegImm },
Int { code: u8 },
Lea { dst: RegMode, sym: Symbol },
}
pub enum RegImm {
@@ -22,22 +29,17 @@ pub struct RegMode {
high: bool,
}
impl Asm {
pub fn compile(&self, out: &mut Vec<u8>) -> Result<(), CompilerMsg> {
for instr in &self.instrs {
instr.compile(out)?;
}
Ok(())
}
}
impl Instr {
pub fn compile(&self, out: &mut Vec<u8>) -> Result<(), CompilerMsg> {
impl super::super::program::Instr for Instr {
fn encode(
&self,
data: &mut Vec<u8>,
syms: &mut SymTable,
) -> Result<Option<(Symbol, usize)>, CompilerMsg> {
match self {
Instr::Mov { dst, src } => {
let width = dst.width;
if width == BitWidth::B16 {
out.push(0x66);
data.push(0x66);
}
let dst8 = dst.gt8();
let b64 = width == BitWidth::B64;
@@ -47,31 +49,58 @@ impl Instr {
} else {
false
};
// special 64-bit / register 4-7 indicator
if dst8 || src8 || b64 || (dst.gt4() && !dst.high) {
out.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
data.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
}
match src {
RegImm::Reg(src) => {
if dst.width != src.width {
return Err("src and dst are not the same size".into());
}
out.push(0x88 | !b8 as u8);
data.push(0x88 | !b8 as u8);
let modrm = 0b11_000_000 | (src.base() << 3) | dst.base();
out.push(modrm);
data.push(modrm);
}
&RegImm::Imm(imm) => {
if imm > width.max() {
return Err("immediate cannot fit in register".into());
}
out.push(0xb0 | ((!b8 as u8) << 3) | dst.base());
out.extend(&imm.to_le_bytes()[..width.bytes()]);
data.push(0xb0 | ((!b8 as u8) << 3) | dst.base());
data.extend(&imm.to_le_bytes()[..width.bytes()]);
}
}
}
Instr::Int { code } => out.extend([0xcd, *code]),
Instr::Int { code } => data.extend([0xcd, *code]),
Instr::Lea { dst, sym } => {
data.extend([
0x48 | ((dst.gt8() as u8) << 2),
0x8d,
0x05 | (dst.base() << 3),
]);
let Some(addr) = syms.get(*sym) else {
let pos = data.len();
data.extend([0; 4]);
return Ok(Some((*sym, pos)));
};
data.extend(addr_offset(data.len(), addr));
}
}
Ok(())
Ok(None)
}
fn insert_sym(&self, data: &mut Vec<u8>, pos: usize, addr: Addr) {
match self {
Self::Lea { .. } => data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)),
_ => panic!("unkown symbol insertion"),
}
}
}
/// assumes the next instruction is directly after
fn addr_offset(pos: usize, addr: Addr) -> [u8; 4] {
let pos = (pos + 4) as i32;
let offset = addr.val() as i32 - pos;
offset.to_le_bytes()
}
impl RegMode {
@@ -169,10 +198,21 @@ impl BitWidth {
}
}
pub fn mov(dst: RegMode, src: impl Into<RegImm>) -> Instr {
Instr::Mov {
dst,
src: src.into(),
pub mod instr {
use super::*;
pub fn mov(dst: RegMode, src: impl Into<RegImm>) -> Instr {
Instr::Mov {
dst,
src: src.into(),
}
}
pub fn lea(dst: RegMode, sym: Symbol) -> Instr {
Instr::Lea { dst, sym }
}
pub fn int(code: u8) -> Instr {
Instr::Int { code }
}
}
@@ -190,13 +230,16 @@ impl From<u64> for RegImm {
#[cfg(test)]
mod test {
use crate::backend::program::Instr as _;
use super::*;
use instr::*;
use reg::*;
fn eq(expected: impl AsRef<[u8]>, got: Instr) {
let expected = expected.as_ref();
let mut res = Vec::new();
if let Err(e) = got.compile(&mut res) {
if let Err(e) = got.encode(&mut res, &mut SymTable::new(0)) {
panic!("expected {expected:x?}, failed to compile: {}", e.msg);
}
assert_eq!(expected, &res[..], "expected {expected:x?}, got {res:x?}");