Files
lang/src/backend/arch/x86_64.rs
T
2026-06-04 04:28:14 -04:00

313 lines
9.0 KiB
Rust

use crate::{
backend::{
program::{Addr, SymTable},
symbol::Symbol,
},
io::CompilerMsg,
};
pub struct Asm {
pub instrs: Vec<Instr>,
}
pub enum Instr {
Mov { dst: RegMode, src: RegImm },
Int { code: u8 },
Lea { dst: RegMode, sym: Symbol },
}
pub enum RegImm {
Reg(RegMode),
Imm(u64),
}
pub struct Reg(u8);
pub struct RegMode {
reg: Reg,
width: BitWidth,
high: bool,
}
impl super::super::program::Instr for Instr {
fn encode(
&self,
data: &mut Vec<u8>,
syms: &mut SymTable,
) -> Result<Option<(Symbol, usize)>, CompilerMsg> {
match self {
Instr::Mov { dst, src } => {
let width = dst.width;
if width == BitWidth::B16 {
data.push(0x66);
}
let dst8 = dst.gt8();
let b64 = width == BitWidth::B64;
let b8 = width == BitWidth::B8;
let src8 = if let RegImm::Reg(src) = src {
src.gt8()
} else {
false
};
// special 64-bit / register 4-7 indicator
if dst8 || src8 || b64 || (dst.gt4() && !dst.high) {
data.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
}
match src {
RegImm::Reg(src) => {
if dst.width != src.width {
return Err("src and dst are not the same size".into());
}
data.push(0x88 | !b8 as u8);
let modrm = 0b11_000_000 | (src.base() << 3) | dst.base();
data.push(modrm);
}
&RegImm::Imm(imm) => {
if imm > width.max() {
return Err("immediate cannot fit in register".into());
}
data.push(0xb0 | ((!b8 as u8) << 3) | dst.base());
data.extend(&imm.to_le_bytes()[..width.bytes()]);
}
}
}
Instr::Int { code } => data.extend([0xcd, *code]),
Instr::Lea { dst, sym } => {
data.extend([
0x48 | ((dst.gt8() as u8) << 2),
0x8d,
0x05 | (dst.base() << 3),
]);
let Some(addr) = syms.get(*sym) else {
let pos = data.len();
data.extend([0; 4]);
return Ok(Some((*sym, pos)));
};
data.extend(addr_offset(data.len(), addr));
}
}
Ok(None)
}
fn insert_sym(&self, data: &mut Vec<u8>, pos: usize, addr: Addr) {
match self {
Self::Lea { .. } => data[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr)),
_ => panic!("unkown symbol insertion"),
}
}
}
/// assumes the next instruction is directly after
fn addr_offset(pos: usize, addr: Addr) -> [u8; 4] {
let pos = (pos + 4) as i32;
let offset = addr.val() as i32 - pos;
offset.to_le_bytes()
}
impl RegMode {
pub fn base(&self) -> u8 {
self.reg.0 & 0b111
}
/// checks if register is not one of the first 8 (0-7)
pub fn gt8(&self) -> bool {
self.reg.0 >= 0b1000
}
pub fn gt4(&self) -> bool {
self.reg.0 >= 0b0100
}
}
macro_rules! def_regs {
($($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $($B8H:ident=$hval:expr)?,)*) => {
#[allow(non_upper_case_globals)]
pub mod reg {
use super::{RegMode, BitWidth, Reg};
$(
pub const $B64: RegMode = RegMode { reg: Reg($val), width: BitWidth::B64, high: false };
pub const $B32: RegMode = RegMode { reg: Reg($val), width: BitWidth::B32, high: false };
pub const $B16: RegMode = RegMode { reg: Reg($val), width: BitWidth::B16, high: false };
pub const $B8 : RegMode = RegMode { reg: Reg($val), width: BitWidth::B8, high: false };
$(
pub const $B8H: RegMode = RegMode { reg: $hval.reg, width: BitWidth::B8, high: true };
)?
)*
}
impl RegMode {
pub fn parse(s: &str) -> Option<Self> {
Some(match s.to_lowercase().as_str() {
$(
stringify!($B64) => reg::$B64,
stringify!($B32) => reg::$B32,
stringify!($B16) => reg::$B16,
stringify!($B8 ) => reg::$B8,
$(
stringify!($B8H) => reg::$B8H,
)?
)*
_ => return None,
})
}
}
};
}
def_regs! {
0b0000 : rax eax ax al ah=spl,
0b0001 : rcx ecx cx cl ch=bpl,
0b0010 : rdx edx dx dl dh=sil,
0b0011 : rbx ebx bx bl bh=dil,
0b0100 : rsp esp sp spl,
0b0101 : rbp ebp bp bpl,
0b0110 : rsi esi si sil,
0b0111 : rdi edi di dil,
0b1000 : r8 r8d r8w r8b,
0b1001 : r9 r9d r9w r9b,
0b1010 : r10 r10d r10w r10b,
0b1011 : r11 r11d r11w r11b,
0b1100 : r12 r12d r12w r12b,
0b1101 : r13 r13d r13w r13b,
0b1110 : r14 r14d r14w r14b,
0b1111 : r15 r15d r15w r15b,
}
#[derive(Clone, Copy, PartialEq)]
pub enum BitWidth {
B64,
B32,
B16,
B8,
}
impl BitWidth {
pub const fn max(&self) -> u64 {
match self {
Self::B64 => u64::MAX,
Self::B32 => u32::MAX as u64,
Self::B16 => u16::MAX as u64,
Self::B8 => u8::MAX as u64,
}
}
pub const fn bytes(&self) -> usize {
match self {
Self::B64 => 8,
Self::B32 => 4,
Self::B16 => 2,
Self::B8 => 1,
}
}
}
pub mod instr {
use super::*;
pub fn mov(dst: RegMode, src: impl Into<RegImm>) -> Instr {
Instr::Mov {
dst,
src: src.into(),
}
}
pub fn lea(dst: RegMode, sym: Symbol) -> Instr {
Instr::Lea { dst, sym }
}
pub fn int(code: u8) -> Instr {
Instr::Int { code }
}
}
impl From<RegMode> for RegImm {
fn from(value: RegMode) -> Self {
Self::Reg(value)
}
}
impl From<u64> for RegImm {
fn from(value: u64) -> Self {
Self::Imm(value)
}
}
#[cfg(test)]
mod test {
use crate::backend::program::Instr as _;
use super::*;
use instr::*;
use reg::*;
fn eq(expected: impl AsRef<[u8]>, got: Instr) {
let expected = expected.as_ref();
let mut res = Vec::new();
if let Err(e) = got.encode(&mut res, &mut SymTable::new(0)) {
panic!("expected {expected:x?}, failed to compile: {}", e.msg);
}
assert_eq!(expected, &res[..], "expected {expected:x?}, got {res:x?}");
}
#[test]
fn reg_reg() {
// used objdump on some nasm compiled assembly
eq([0x48, 0x89, 0xd8], mov(rax, rbx));
eq([0x89, 0xd8], mov(eax, ebx));
eq([0x66, 0x89, 0xd8], mov(ax, bx));
eq([0x88, 0xd8], mov(al, bl));
eq([0x88, 0xfc], mov(ah, bh));
eq([0x88, 0xf8], mov(al, bh));
eq([0x88, 0xdc], mov(ah, bl));
eq([0x40, 0x88, 0xe7], mov(dil, spl));
eq([0x4d, 0x89, 0xc8], mov(r8, r9));
eq([0x45, 0x89, 0xc8], mov(r8d, r9d));
eq([0x66, 0x45, 0x89, 0xc8], mov(r8w, r9w));
eq([0x45, 0x88, 0xc8], mov(r8b, r9b));
eq([0x49, 0x89, 0xc0], mov(r8, rax));
eq([0x4c, 0x89, 0xc0], mov(rax, r8));
eq([0x4d, 0x89, 0xd1], mov(r9, r10));
eq([0x4d, 0x89, 0xe0], mov(r8, r12));
}
#[test]
fn reg_imm() {
eq(
[0x49, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(r15, 0x123456789abcdef0),
);
eq(
[0x49, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(r8, 0x123456789abcdef0),
);
eq(
[0x49, 0xb9, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(r9, 0x123456789abcdef0),
);
eq([0x41, 0xb9, 0x78, 0x56, 0x34, 0x12], mov(r9d, 0x12345678));
eq([0x66, 0x41, 0xb9, 0x34, 0x12], mov(r9w, 0x1234));
eq([0x41, 0xb1, 0x12], mov(r9b, 0x12));
eq([0x41, 0xb0, 0x12], mov(r8b, 0x12));
eq([0x41, 0xb7, 0x12], mov(r15b, 0x12));
eq(
[0x48, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(rax, 0x123456789abcdef0),
);
eq(
[0x48, 0xbb, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(rbx, 0x123456789abcdef0),
);
eq(
[0x48, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(rdi, 0x123456789abcdef0),
);
eq([0xbb, 0x78, 0x56, 0x34, 0x12], mov(ebx, 0x12345678));
eq([0x66, 0xbb, 0x34, 0x12], mov(bx, 0x1234));
eq([0xb3, 0x12], mov(bl, 0x12));
eq([0xb7, 0x12], mov(bh, 0x12));
eq([0xb4, 0x12], mov(ah, 0x12));
eq([0x40, 0xb7, 0x12], mov(dil, 0x12));
}
}