x86_64 compiler + elf output (can compile code that returns exit code)

This commit is contained in:
2026-06-03 01:50:43 -04:00
parent 473ddab0d4
commit 380a0f977a
11 changed files with 422 additions and 76 deletions
-24
View File
@@ -1,24 +0,0 @@
mod namespace;
pub use namespace::*;
use super::Id;
pub struct Fn {
pub body: Body,
}
pub struct Body {
pub statements: Vec<Statement>,
}
pub struct Statement {
ty: StatementTy,
}
pub enum StatementTy {
Define,
Assign,
Call { target: VarId, args: VarId },
}
pub type VarId = usize;
+238 -32
View File
@@ -1,32 +1,120 @@
use crate::io::CompilerMsg;
pub struct Asm {
pub instrs: Vec<Instr>,
}
pub enum Instr {
Mov { dst: RegMode, src: RegImm },
Int { code: u8 },
}
pub enum RegImm {
Reg(RegMode),
Imm(u64),
}
pub struct Reg(u8);
pub struct RegMode {
reg: Reg,
mode: BitMode,
width: BitWidth,
high: bool,
}
impl Asm {
pub fn compile(&self, out: &mut Vec<u8>) -> Result<(), CompilerMsg> {
for instr in &self.instrs {
instr.compile(out)?;
}
Ok(())
}
}
impl Instr {
pub fn compile(&self, out: &mut Vec<u8>) -> Result<(), CompilerMsg> {
match self {
Instr::Mov { dst, src } => {
let width = dst.width;
if width == BitWidth::B16 {
out.push(0x66);
}
let dst8 = dst.gt8();
let b64 = width == BitWidth::B64;
let b8 = width == BitWidth::B8;
let src8 = if let RegImm::Reg(src) = src {
src.gt8()
} else {
false
};
if dst8 || src8 || b64 || (dst.gt4() && !dst.high) {
out.push(0x40 | dst8 as u8 | ((b64 as u8) << 3) | ((src8 as u8) << 2));
}
match src {
RegImm::Reg(src) => {
if dst.width != src.width {
return Err("src and dst are not the same size".into());
}
out.push(0x88 | !b8 as u8);
let modrm = 0b11_000_000 | (src.base() << 3) | dst.base();
out.push(modrm);
}
&RegImm::Imm(imm) => {
if imm > width.max() {
return Err("immediate cannot fit in register".into());
}
out.push(0xb0 | ((!b8 as u8) << 3) | dst.base());
out.extend(&imm.to_le_bytes()[..width.bytes()]);
}
}
}
Instr::Int { code } => out.extend([0xcd, *code]),
}
Ok(())
}
}
impl RegMode {
pub fn base(&self) -> u8 {
self.reg.0 & 0b111
}
/// checks if register is not one of the first 8 (0-7)
pub fn gt8(&self) -> bool {
self.reg.0 >= 0b1000
}
pub fn gt4(&self) -> bool {
self.reg.0 >= 0b0100
}
}
macro_rules! def_regs {
($($val:literal $reg:ident: $B64:literal $B32:literal $B16:literal $B8:literal $($B16H:literal)?,)*) => {
impl Reg {
($($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $($B8H:ident=$hval:expr)?,)*) => {
#[allow(non_upper_case_globals)]
pub mod reg {
use super::{RegMode, BitWidth, Reg};
$(
pub const $reg: u8 = $val;
pub const $B64: RegMode = RegMode { reg: Reg($val), width: BitWidth::B64, high: false };
pub const $B32: RegMode = RegMode { reg: Reg($val), width: BitWidth::B32, high: false };
pub const $B16: RegMode = RegMode { reg: Reg($val), width: BitWidth::B16, high: false };
pub const $B8 : RegMode = RegMode { reg: Reg($val), width: BitWidth::B8, high: false };
$(
pub const $B8H: RegMode = RegMode { reg: $hval.reg, width: BitWidth::B8, high: true };
)?
)*
}
impl RegMode {
pub fn parse(s: &str) -> Option<Self> {
let (reg, mode) = match s.to_lowercase().as_str() {
Some(match s.to_lowercase().as_str() {
$(
$B64 => ($val, BitMode::B64),
$B32 => ($val, BitMode::B32),
$B16 => ($val, BitMode::B16),
$B8 => ($val, BitMode::B8),
$($B16H => ($val, BitMode::B16H),)?
stringify!($B64) => reg::$B64,
stringify!($B32) => reg::$B32,
stringify!($B16) => reg::$B16,
stringify!($B8 ) => reg::$B8,
$(
stringify!($B8H) => reg::$B8H,
)?
)*
_ => return None,
};
Some(RegMode {
reg: Reg(reg),
mode,
})
}
}
@@ -34,30 +122,148 @@ macro_rules! def_regs {
}
def_regs! {
0b0000 A : "rax" "eax" "ax" "al" "ah",
0b0001 C : "rcx" "ecx" "cx" "cl" "ch",
0b0010 D : "rdx" "edx" "dx" "dl" "dh",
0b0011 B : "rbx" "ebx" "bx" "bl" "bh",
0b0000 : rax eax ax al ah=spl,
0b0001 : rcx ecx cx cl ch=bpl,
0b0010 : rdx edx dx dl dh=sil,
0b0011 : rbx ebx bx bl bh=dil,
0b0100 SP: "rsp" "esp" "sp" "spl",
0b0101 BP: "rbp" "ebp" "bp" "sbl",
0b0110 SI: "rsi" "esi" "si" "sil",
0b0111 DI: "rdi" "edi" "di" "dil",
0b0100 : rsp esp sp spl,
0b0101 : rbp ebp bp bpl,
0b0110 : rsi esi si sil,
0b0111 : rdi edi di dil,
0b1000 R8 : "r8" "r8d" "r8w" "r8b",
0b1001 R9 : "r9" "r9d" "r9w" "r9b",
0b1010 R10: "r10" "r10d" "r10w" "r10b",
0b1011 R11: "r11" "r11d" "r11w" "r11b",
0b1100 R12: "r12" "r12d" "r12w" "r12b",
0b1101 R13: "r13" "r13d" "r13w" "r13b",
0b1110 R14: "r14" "r14d" "r14w" "r14b",
0b1111 R15: "r15" "r15d" "r15w" "r15b",
0b1000 : r8 r8d r8w r8b,
0b1001 : r9 r9d r9w r9b,
0b1010 : r10 r10d r10w r10b,
0b1011 : r11 r11d r11w r11b,
0b1100 : r12 r12d r12w r12b,
0b1101 : r13 r13d r13w r13b,
0b1110 : r14 r14d r14w r14b,
0b1111 : r15 r15d r15w r15b,
}
pub enum BitMode {
#[derive(Clone, Copy, PartialEq)]
pub enum BitWidth {
B64,
B32,
B16,
B16H,
B8,
}
impl BitWidth {
pub const fn max(&self) -> u64 {
match self {
Self::B64 => u64::MAX,
Self::B32 => u32::MAX as u64,
Self::B16 => u16::MAX as u64,
Self::B8 => u8::MAX as u64,
}
}
pub const fn bytes(&self) -> usize {
match self {
Self::B64 => 8,
Self::B32 => 4,
Self::B16 => 2,
Self::B8 => 1,
}
}
}
pub fn mov(dst: RegMode, src: impl Into<RegImm>) -> Instr {
Instr::Mov {
dst,
src: src.into(),
}
}
impl From<RegMode> for RegImm {
fn from(value: RegMode) -> Self {
Self::Reg(value)
}
}
impl From<u64> for RegImm {
fn from(value: u64) -> Self {
Self::Imm(value)
}
}
#[cfg(test)]
mod test {
use super::*;
use reg::*;
fn eq(expected: impl AsRef<[u8]>, got: Instr) {
let expected = expected.as_ref();
let mut res = Vec::new();
if let Err(e) = got.compile(&mut res) {
panic!("expected {expected:x?}, failed to compile: {}", e.msg);
}
assert_eq!(expected, &res[..], "expected {expected:x?}, got {res:x?}");
}
#[test]
fn reg_reg() {
// used objdump on some nasm compiled assembly
eq([0x48, 0x89, 0xd8], mov(rax, rbx));
eq([0x89, 0xd8], mov(eax, ebx));
eq([0x66, 0x89, 0xd8], mov(ax, bx));
eq([0x88, 0xd8], mov(al, bl));
eq([0x88, 0xfc], mov(ah, bh));
eq([0x88, 0xf8], mov(al, bh));
eq([0x88, 0xdc], mov(ah, bl));
eq([0x40, 0x88, 0xe7], mov(dil, spl));
eq([0x4d, 0x89, 0xc8], mov(r8, r9));
eq([0x45, 0x89, 0xc8], mov(r8d, r9d));
eq([0x66, 0x45, 0x89, 0xc8], mov(r8w, r9w));
eq([0x45, 0x88, 0xc8], mov(r8b, r9b));
eq([0x49, 0x89, 0xc0], mov(r8, rax));
eq([0x4c, 0x89, 0xc0], mov(rax, r8));
eq([0x4d, 0x89, 0xd1], mov(r9, r10));
eq([0x4d, 0x89, 0xe0], mov(r8, r12));
}
#[test]
fn reg_imm() {
eq(
[0x49, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(r15, 0x123456789abcdef0),
);
eq(
[0x49, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(r8, 0x123456789abcdef0),
);
eq(
[0x49, 0xb9, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(r9, 0x123456789abcdef0),
);
eq([0x41, 0xb9, 0x78, 0x56, 0x34, 0x12], mov(r9d, 0x12345678));
eq([0x66, 0x41, 0xb9, 0x34, 0x12], mov(r9w, 0x1234));
eq([0x41, 0xb1, 0x12], mov(r9b, 0x12));
eq([0x41, 0xb0, 0x12], mov(r8b, 0x12));
eq([0x41, 0xb7, 0x12], mov(r15b, 0x12));
eq(
[0x48, 0xb8, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(rax, 0x123456789abcdef0),
);
eq(
[0x48, 0xbb, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(rbx, 0x123456789abcdef0),
);
eq(
[0x48, 0xbf, 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12],
mov(rdi, 0x123456789abcdef0),
);
eq([0xbb, 0x78, 0x56, 0x34, 0x12], mov(ebx, 0x12345678));
eq([0x66, 0xbb, 0x34, 0x12], mov(bx, 0x1234));
eq([0xb3, 0x12], mov(bl, 0x12));
eq([0xb7, 0x12], mov(bh, 0x12));
eq([0xb4, 0x12], mov(ah, 0x12));
eq([0x40, 0xb7, 0x12], mov(dil, 0x12));
}
}
+119
View File
@@ -0,0 +1,119 @@
use crate::backend::Addr;
#[repr(C)]
pub struct ELF64Header {
magic: u32,
class: u8,
endianness: u8,
ei_version: u8,
os_abi: u8,
os_abi_ver: u8,
pad: [u8; 7],
ty: u16,
machine: u16,
e_version: u32,
entry: u64,
program_header_offset: u64,
section_header_offset: u64,
flags: u32,
header_size: u16,
program_header_entry_size: u16,
program_header_num: u16,
section_header_entry_size: u16,
section_header_num: u16,
section_header_str_idx: u16,
}
#[repr(C)]
pub struct ProgramHeader {
ty: u32,
flags: u32,
offset: u64,
vaddr: u64,
paddr: u64,
filesz: u64,
memsz: u64,
align: u64,
}
#[repr(C)]
pub struct SectionHeader {
name_idx: u32,
ty: u32,
flags: u64,
addr: u64,
offset: u64,
size: u64,
link: u32,
info: u32,
addr_align: u64,
entry_size: u64,
}
pub enum Arch {
X86_64,
RISCV,
}
impl Arch {
pub fn machine(&self) -> u16 {
match self {
Arch::X86_64 => 0x3e,
Arch::RISCV => 0xf3,
}
}
}
// this is currently specialized for riscv64; obviously add params later
pub fn create(program: &[u8], start_offset: Addr) -> Vec<u8> {
let addr_start = 0x400000;
let page_size = 0x1000;
// I don't know if I have to add addr_start here, idk how it maps the memory
let program_size = std::mem::size_of_val(program) as u64;
let program_header = ProgramHeader {
ty: 0x1, // LOAD
flags: 0b101, // executable, readable
offset: 0x0,
vaddr: addr_start,
paddr: 0x0,
filesz: program_size,
memsz: program_size,
align: page_size,
};
let header_len = (size_of::<ELF64Header>() + size_of::<ProgramHeader>()) as u64;
let program_pos = header_len;
let header = ELF64Header {
magic: 0x7f_45_4c_46u32.swap_bytes(),
class: 0x2, // 64 bit
endianness: 0x1, // little endian
ei_version: 0x1,
os_abi: 0x0, // system-v
os_abi_ver: 0x0,
pad: [0x0; 7],
ty: 0x2, // executable
machine: Arch::X86_64.machine(),
e_version: 0x1,
entry: addr_start + program_pos + start_offset.val(),
program_header_offset: size_of::<ELF64Header>() as u64,
section_header_offset: 0x0,
// C ABI (16 bit instruction align) + double precision floats
flags: 0x1 | 0x4,
header_size: size_of::<ELF64Header>() as u16,
program_header_entry_size: size_of::<ProgramHeader>() as u16,
program_header_num: 0x1,
section_header_entry_size: size_of::<SectionHeader>() as u16,
section_header_num: 0x0,
section_header_str_idx: 0x0,
};
let mut bytes: Vec<u8> = Vec::new();
unsafe {
bytes.extend(as_u8_slice(&header));
bytes.extend(as_u8_slice(&program_header));
}
bytes.extend(program);
bytes
}
unsafe fn as_u8_slice<T: Sized>(p: &T) -> &[u8] {
unsafe { core::slice::from_raw_parts((p as *const T) as *const u8, size_of::<T>()) }
}
+17
View File
@@ -1 +1,18 @@
pub mod arch;
pub mod elf;
mod test;
pub use arch::*;
pub use test::*;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Addr(u64);
impl Addr {
const NONE: Self = Self(!0);
pub fn val(&self) -> u64 {
self.0
}
}
pub enum Asm {
X86_64(x86_64::Asm),
}
+31
View File
@@ -0,0 +1,31 @@
use crate::backend::{
Addr, elf,
x86_64::{Asm, Instr, mov, reg::*},
};
use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt, process::Command};
pub fn test_x86_64() {
let asm = Asm {
instrs: vec![mov(eax, 1), mov(ebx, 39), Instr::Int { code: 0x80 }],
};
let mut out = Vec::new();
asm.compile(&mut out).expect("failed to compile");
let binary = elf::create(&out, Addr(0));
let path = "./x86_64_test";
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.mode(0o750)
.open(path)
.expect("Failed to create file");
file.write_all(&binary).expect("Failed to write to file");
file.sync_all().expect("Failed to sync file");
drop(file);
println!("running...");
let mut proc = Command::new(path).spawn().expect("failed to run");
let status = proc.wait().expect("failed to wait");
if let Some(code) = status.code() {
std::process::exit(code);
}
}
+1
View File
@@ -26,6 +26,7 @@ impl<T> std::ops::DerefMut for Spanned<T> {
}
}
#[derive(Debug)]
pub struct CompilerMsg {
pub spans: Vec<Span>,
pub msg: String,
+5
View File
@@ -1,6 +1,8 @@
mod namespace;
pub use namespace::*;
use crate::backend::Asm;
use super::Id;
pub struct Fn {
@@ -19,6 +21,7 @@ pub enum StatementTy {
Define { target: VarId, val: VarId },
Assign { target: VarId, val: VarId },
Call { target: VarId, args: Vec<VarId> },
Asm(Asm),
}
pub struct Var {
@@ -29,6 +32,8 @@ pub struct Var {
pub enum Type {
Unsigned(u8),
Signed(u8),
Array(TypeId),
Ptr(TypeId),
Infer,
}
+1
View File
@@ -7,6 +7,7 @@ mod parser;
mod parser_ir;
fn main() {
return crate::backend::test_x86_64();
let mut args = std::env::args();
let Some(path) = args.nth(1) else {
println!("file expected");
+5 -2
View File
@@ -1,9 +1,12 @@
use crate::parser::{Node, cursor::Token};
use crate::{
backend::arch::x86_64::Asm,
parser::{Node, cursor::Token},
};
pub mod x86_64;
pub enum AsmBlock {
X86_64(x86_64::Asm),
X86_64(Asm),
}
impl Node for AsmBlock {
+5 -18
View File
@@ -1,5 +1,5 @@
use crate::{
backend::arch::x86_64::RegMode,
backend::arch::x86_64::*,
io::{CompilerMsg, Span},
parser::{
Node,
@@ -7,20 +7,6 @@ use crate::{
},
};
pub struct Asm {
instrs: Vec<Instr>,
}
pub enum Instr {
Mov { dst: RegMode, src: RegImm },
Int { code: u64 },
}
pub enum RegImm {
Reg(RegMode),
Imm(u64),
}
impl Node for Asm {
fn parse(ctx: &mut crate::parser::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
let mut instrs = Vec::new();
@@ -38,7 +24,9 @@ impl Node for Asm {
let Token::Lit(LitTy::Number(num)) = ctx.expect_next()? else {
return Err("Expected an immediate".into());
};
let code = parse_imm(&num, ctx.span)?;
let code = parse_imm(&num, ctx.span)?
.try_into()
.map_err(|_| CompilerMsg::from("Immediate must be a u8"))?;
instrs.push(Instr::Int { code });
}
_ => {
@@ -65,8 +53,7 @@ pub fn parse_imm(mut s: &str, span: Span) -> Result<u64, CompilerMsg> {
radix = 16;
s = &s[2..];
}
u64::from_str_radix(s, radix)
.map_err(|_| CompilerMsg::from(("invalid immediate", span)))
u64::from_str_radix(s, radix).map_err(|_| CompilerMsg::from(("invalid immediate", span)))
}
pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result<RegImm, CompilerMsg> {
Executable
BIN
View File
Binary file not shown.