Compare commits

..
1 Commits
Author SHA1 Message Date
iris 867f9e51bd parser2 2026-04-08 17:53:21 -04:00
77 changed files with 892 additions and 3766 deletions
Generated
+1 -110
View File
@@ -3,114 +3,5 @@
version = 4
[[package]]
name = "arrayvec"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "bitcode"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a6ed1b54d8dc333e7be604d00fa9262f4635485ffea923647b6521a5fff045d"
dependencies = [
"arrayvec",
"bitcode_derive",
"bytemuck",
"glam",
"serde",
]
[[package]]
name = "bitcode_derive"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "238b90427dfad9da4a9abd60f3ec1cdee6b80454bde49ed37f1781dd8e9dc7f9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "bytemuck"
version = "1.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
[[package]]
name = "glam"
version = "0.33.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "898f5a568a84989b6c0f8caa50a93074b97dbdc58fc6d9543157bb4562758933"
[[package]]
name = "lang"
name = "v2"
version = "0.1.0"
dependencies = [
"bitcode",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+1 -4
View File
@@ -1,9 +1,6 @@
[package]
name = "lang"
name = "v2"
version = "0.1.0"
edition = "2024"
[dependencies]
[dev-dependencies]
bitcode = "0.6.9"
-13
View File
@@ -1,13 +0,0 @@
use crate::{
backend::{Addr, LinkedProgram, Program},
io::CompilerMsg,
};
pub mod x86_64;
pub trait Arch: Sized {
const NAME: &str;
type Asm;
type Addr: Addr;
fn compile(p: &Program<Self>) -> Result<LinkedProgram<Self::Addr>, CompilerMsg>;
}
-83
View File
@@ -1,83 +0,0 @@
use std::collections::HashMap;
use super::*;
use crate::backend::{LibImport, LinkedProgram, SymImport, SymTable, Symbol};
use util::*;
pub struct Encoder<'a> {
pub code: Code,
pub sym_tab: SymTable<u64>,
pub sym_refs: HashMap<Symbol, Vec<usize>>,
pub program: &'a Program<X86_64>,
}
pub fn compile(p: &Program<X86_64>) -> Result<LinkedProgram<u64>, CompilerMsg> {
let mut encoder = Encoder::new(p);
p.encode_data(&mut encoder.code.bytes, &mut encoder.sym_tab);
for f in &p.funcs {
let addr = encoder.code.bytes.len();
encoder.sym_tab.insert(f.sym, addr as u64);
for instr in &f.instrs {
encoder.compile_instr(instr)?;
}
}
for (pos, sym) in encoder.code.missing.drain(..) {
let info = encoder.program.sym_info(sym);
if info.external {
encoder.sym_refs.entry(sym).or_default().push(pos);
} else {
let addr = encoder
.sym_tab
.get(sym)
.ok_or(CompilerMsg::from(format!("missing symbol {}", info.name)))?;
encoder.code.bytes[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr))
}
}
let imports = p
.external
.iter()
.map(|e| LibImport {
name: e.file.clone(),
syms: e
.syms
.iter()
.map(|&s| SymImport {
name: p.sym_info(s).name.clone(),
usages: encoder.sym_refs.entry(s).or_default().clone(),
})
.collect(),
})
.collect();
Ok(LinkedProgram {
code: encoder.code.bytes,
entry: p.entry.and_then(|e| encoder.sym_tab.get(e)),
imports,
})
}
type BInstr = crate::backend::Instr<X86_64>;
impl<'a> Encoder<'a> {
fn compile_instr(&mut self, instr: &BInstr) -> Result<(), CompilerMsg> {
match instr {
BInstr::Asm(asm) => {
self.code.extend(asm);
}
_ => todo!(),
}
Ok(())
}
pub fn new(program: &'a Program<X86_64>) -> Self {
Self {
code: Code::default(),
sym_tab: SymTable::new(program.sym_count()),
sym_refs: Default::default(),
program,
}
}
}
-280
View File
@@ -1,280 +0,0 @@
use super::*;
use crate::backend::Symbol;
type ERes = Result<(), CompilerMsg>;
/// machine code
#[derive(Default)]
pub struct Code {
pub(super) bytes: Vec<u8>,
pub(super) missing: Vec<(usize, Symbol)>,
}
impl Code {
pub fn mov(&mut self, dst: impl Into<RegMem>, src: impl Into<RegImmMem>) -> ERes {
let dst = dst.into();
let src = src.into();
match dst {
RegMem::Reg(mut dst) => match src {
RegImmMem::Reg(src) => {
if dst.width() != src.width() {
return Err("src and dst are not same width".into());
}
if dst.incompatible(&src) {
return Err("incompatible registers due to rex".into());
}
let width = dst.width();
self.prefix16(width);
self.rex(width, src, 0, dst);
self.bytes.push(0x88 | width.not8());
self.modrm(src, dst);
}
RegImmMem::Imm(src) => {
let src_width = src.width_unsigned()?;
if src_width > dst.width() {
return Err("immediate cannot fit in register".into());
}
self.prefix16(dst);
if dst.width() == Width::B64 && src_width <= Width::B32 && src.0 < 0 {
// use different op that sign extends for less bytes
self.bytes
.extend([rex(dst, 0, 0, dst), 0xc7, 0xc0 | dst.base()]);
self.imm(src, Width::B32);
} else {
if src_width <= Width::B32 {
dst = dst.lower64();
}
self.rex(dst, 0, 0, dst);
self.bytes.push(0xb0 | (dst.not8() << 3) | dst.base());
self.imm(src, dst.width());
}
}
RegImmMem::Mem(src) => {
if src.width != dst.width() {
return Err("register & memory sizes don't match".into());
}
if dst.high() && src.reg.gt8() {
return Err("registers incompatible (REX)".into());
}
self.prefix32(&src)?;
self.prefix16(dst);
self.rex(dst, dst, 0, src);
self.bytes.push(0x8a | dst.not8());
self.modrm(dst, src);
}
},
RegMem::Mem(dst) => match src {
RegImmMem::Reg(src) => {
if src.width() != dst.width {
return Err("register & memory sizes don't match".into());
}
if src.high() && dst.reg.gt8() {
return Err("registers incompatible (REX)".into());
}
self.prefix32(&dst)?;
self.prefix16(src);
self.rex(dst, src, 0, dst);
self.bytes.push(0x88 | src.not8());
self.modrm(src, dst);
}
RegImmMem::Imm(src) => {
let encode_width = dst.width.min(Width::B32);
let src_width = if dst.width == Width::B64 {
src.width_signed()
} else {
src.width_unsigned()
}?;
if src_width == Width::B64 {
return Err("cannot move 64 bit immediate into memory".into());
}
if src_width > dst.width {
return Err("source cannot fit in destination".into());
}
self.prefix32(&dst)?;
self.prefix16(encode_width);
self.rex(dst, 0, 0, dst);
self.bytes.push(0xc6 | encode_width.not8());
self.modrm(0, dst);
self.imm(src, encode_width);
}
RegImmMem::Mem(_) => return Err("cannot move memory to memory".into()),
},
}
Ok(())
}
pub fn push(&mut self, reg: impl Into<RegImmMem>) -> ERes {
match reg.into() {
RegImmMem::Reg(reg) => match reg.width() {
Width::B64 => {
if reg.gt8() {
self.bytes.push(0x41);
}
self.bytes.push(0x50 | reg.base());
}
Width::B16 => {}
_ => return Err("register must be 64 or 16 bit".into()),
},
RegImmMem::Imm(imm) => match imm.width_unsigned()? {
Width::B8 => {
self.bytes.push(0x6a);
self.bytes.push(imm.0 as u8);
}
Width::B16 | Width::B32 => {
self.bytes.push(0x68);
self.bytes.extend((imm.0 as u32).to_le_bytes());
}
Width::B64 => return Err("immediate must be 32 bit or less".into()),
},
RegImmMem::Mem(mem) => todo!(),
}
Ok(())
}
pub fn pop(&mut self, reg: Reg) -> ERes {
match reg.width() {
Width::B64 | Width::B16 => (),
_ => return Err("register must be 64 or 16 bit".into()),
}
self.prefix16(reg);
if reg.gt8() {
self.bytes.push(0x41);
}
self.bytes.push(0x58 | reg.base());
Ok(())
}
pub fn lea(&mut self, dst: Reg, sym: Symbol) {
self.rex(1, dst, 0, 0);
self.bytes.push(0x8d);
self.modrm(dst, sym);
}
pub fn int(&mut self, code: u8) {
self.bytes.extend([0xcd, code])
}
pub fn syscall(&mut self) {
self.bytes.extend([0x0f, 0x05])
}
pub fn call(&mut self, sym: Symbol) {
self.bytes.push(0xe8);
self.sym_offset4(sym);
}
pub fn call_mem(&mut self, sym: Symbol) {
self.bytes.extend([0xff, 0x15]);
self.sym_offset4(sym);
}
pub fn ret(&mut self) {
self.bytes.push(0xc3);
}
fn add_sub(&mut self, dst: Reg, src: impl Into<Imm>, ext: u8) -> ERes {
let mut src = src.into();
let mut width = src.width_signed()?;
let dst_width = dst.width().min(Width::B32);
self.prefix16(dst_width);
self.rex(dst, 0, 0, dst);
if width > dst_width {
width = src.width_unsigned()?;
if dst.width() == Width::B64 || width > dst_width {
return Err("immediate overflow".into());
}
src = src.reinterpret(dst_width);
width = src.width_signed()?;
}
if dst.width() == Width::B8 {
self.bytes.push(0x80);
} else if width == Width::B8 {
self.bytes.push(0x83);
} else {
self.bytes.push(0x81);
width = dst_width;
}
self.modrm(ext, dst);
self.imm(src, width);
Ok(())
}
pub fn add(&mut self, dst: Reg, src: impl Into<Imm>) -> ERes {
self.add_sub(dst, src, 0)
}
pub fn sub(&mut self, dst: Reg, src: impl Into<Imm>) -> ERes {
self.add_sub(dst, src, 5)
}
fn prefix16(&mut self, width: impl Into<Width>) {
if width.into() == Width::B16 {
self.bytes.push(0x66);
}
}
fn prefix32(&mut self, mem: &Mem) -> Result<(), CompilerMsg> {
match mem.reg.width() {
Width::B8 | Width::B16 => return Err("invalid register width".into()),
Width::B32 => self.bytes.push(0x67),
Width::B64 => (),
}
Ok(())
}
fn rex(&mut self, w: impl RexW, r: impl RexBit, x: u8, b: impl RexBit) {
if w.rexw() || r.rex() || x.rex() || b.rex() | r.req() | b.req() {
self.bytes.push(rex(w, r, x, b));
}
}
fn modrm(&mut self, reg: impl ModRMReg, rm: impl ModRMRM) {
let addr = rm.addr();
let mod_ = match addr {
EffAddr::Mem0 | EffAddr::Sym(_) => 0b00,
EffAddr::Mem8(_) => 0b01,
EffAddr::Mem32(_) => 0b10,
EffAddr::None => 0b11,
};
self.bytes
.push(((mod_ as u8) << 6) | (reg.val() << 3) | rm.rm());
if !matches!(addr, EffAddr::None) && rm.rm() == 0b100 {
// SIB
self.bytes.push(0x24);
}
match addr {
EffAddr::Mem8(disp) => self.bytes.push(disp as u8),
EffAddr::Mem32(disp) => self.bytes.extend(disp.to_le_bytes()),
EffAddr::Sym(sym) => self.sym_offset4(sym),
_ => (),
}
}
/// inserts a 32 bit offset from a symbol
fn sym_offset4(&mut self, sym: Symbol) {
let pos = self.bytes.len();
self.bytes.extend([0; 4]);
self.missing.push((pos, sym));
}
pub fn extend(&mut self, other: &Code) {
let pos = self.bytes.len();
self.bytes.extend(&other.bytes);
self.missing
.extend(other.missing.iter().map(|&(p, s)| (pos + p, s)));
}
fn imm(&mut self, imm: Imm, width: Width) {
self.bytes.extend(&imm.0.to_le_bytes()[..width.bytes()]);
}
}
pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result<Code, CompilerMsg> {
let mut code = Code::default();
f(&mut code)?;
Ok(code)
}
-30
View File
@@ -1,30 +0,0 @@
mod compile;
mod encode;
mod reg;
#[cfg(test)]
mod test;
mod types;
mod util;
use crate::{
arch::Arch,
backend::{LinkedProgram, Program},
io::CompilerMsg,
};
pub use compile::*;
pub use encode::*;
pub use reg::*;
pub use types::*;
use util::*;
pub struct X86_64;
impl Arch for X86_64 {
const NAME: &str = "x86_64";
type Asm = Code;
type Addr = u64;
fn compile(p: &Program<Self>) -> Result<LinkedProgram<Self::Addr>, CompilerMsg> {
compile(p)
}
}
-197
View File
@@ -1,197 +0,0 @@
#[derive(Clone, Copy, PartialEq)]
pub struct Reg {
val: u8,
high: bool,
width: Width,
}
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
#[repr(u8)]
pub enum Width {
B8 = 0,
B16 = 1,
B32 = 2,
B64 = 3,
}
def_regs! {
0b0000 : rax eax ax al,
0b0001 : rcx ecx cx cl !_,
0b0010 : rdx edx dx dl,
0b0011 : rbx ebx bx bl,
0b0100 : rsp esp sp spl norex=ah !_,
0b0101 : rbp ebp bp bpl norex=ch,
0b0110 : rsi esi si sil norex=dh !_,
0b0111 : rdi edi di dil norex=bh,
0b1000 : r8 r8d r8w r8b,
0b1001 : r9 r9d r9w r9b !_,
0b1010 : r10 r10d r10w r10b,
0b1011 : r11 r11d r11w r11b,
0b1100 : r12 r12d r12w r12b !_,
0b1101 : r13 r13d r13w r13b,
0b1110 : r14 r14d r14w r14b,
0b1111 : r15 r15d r15w r15b,
}
impl Reg {
pub fn base(&self) -> u8 {
self.val & 0b111
}
/// checks if register is not one of the first 8 (0-7)
pub fn gt8(&self) -> bool {
self.val >= 0b1000
}
pub fn gt4(&self) -> bool {
self.val >= 0b0100
}
pub fn width(&self) -> Width {
self.width
}
pub fn not8(&self) -> u8 {
self.width.not8()
}
pub fn high(&self) -> bool {
self.high
}
/// if self has 64 bit width, changes width to 32 bit
pub fn lower64(&self) -> Self {
let mut new = *self;
new.width = new.width.min(Width::B32);
new
}
pub fn requires_rex(&self) -> bool {
self.gt8()
|| self.width == Width::B64
|| (self.gt4() && self.width == Width::B8 && !self.high)
}
pub fn incompatible(&self, other: &Reg) -> bool {
(self.requires_rex() && other.high) || (self.high && other.requires_rex())
}
const fn new(val: u8, width: Width, high: bool) -> Self {
Self { val, high, width }
}
}
impl Width {
pub const fn max_val(&self) -> u64 {
match self {
Self::B64 => u64::MAX,
Self::B32 => u32::MAX as u64,
Self::B16 => u16::MAX as u64,
Self::B8 { .. } => u8::MAX as u64,
}
}
pub fn min(self, other: Self) -> Self {
if self <= other { self } else { other }
}
pub const fn bytes(&self) -> usize {
match self {
Self::B64 => 8,
Self::B32 => 4,
Self::B16 => 2,
Self::B8 { .. } => 1,
}
}
/// greater than 8 bits
pub const fn not8(&self) -> u8 {
!matches!(self, Self::B8) as u8
}
}
macro_rules! filter {
($($filtered:ident)*; ! $_:tt $($item:ident)*; $($rest:tt)*) => {
filter!($($filtered)* $($item)*; $($rest)*)
};
($($filtered:ident)*; $($item:ident)*; $($rest:tt)*) => {
filter!($($filtered)*; $($rest)*)
};
($($filtered:ident)*;) => {
[$($filtered, )*]
};
}
use filter;
macro_rules! def_regs {
($($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $(norex=$B8H:ident)? $(!$imp:tt)?,)*) => {
$(
#[allow(non_upper_case_globals)]
pub const $B64: Reg = Reg::new($val, Width::B64, false);
#[allow(non_upper_case_globals)]
pub const $B32: Reg = Reg::new($val, Width::B32, false);
#[allow(non_upper_case_globals)]
pub const $B16: Reg = Reg::new($val, Width::B16, false);
#[allow(non_upper_case_globals)]
pub const $B8 : Reg = Reg::new($val, Width::B8 , false);
$(
#[allow(non_upper_case_globals)]
pub const $B8H: Reg = Reg::new($val, Width::B8, true);
)?
)*
impl Reg {
// #[cfg(test)]
// pub const ALL: &[Reg] = &[
// $( $B64, $B32, $B16, $B8, $($B8H,)? )*
// ];
#[cfg(test)]
pub const IMPORTANT: &[Reg] = &
filter!(; $($(!$imp)? $B64 $B32 $B16 $B8 $($B8H)?; )* )
;
pub fn parse(s: &str) -> Option<Self> {
Some(match s.to_lowercase().as_str() {
$(
stringify!($B64) => $B64,
stringify!($B32) => $B32,
stringify!($B16) => $B16,
stringify!($B8 ) => $B8,
$(
stringify!($B8H) => $B8H,
)?
)*
_ => return None,
})
}
}
impl std::fmt::Display for Reg {
#[allow(non_upper_case_globals)]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", match *self {
$(
$B64 => stringify!($B64),
$B32 => stringify!($B32),
$B16 => stringify!($B16),
$B8 => stringify!($B8),
$(
$B8H => stringify!($B8H),
)?
)*
_ => "UNKNOWN",
})
}
}
};
}
use def_regs;
use crate::arch::x86_64::Imm;
impl From<Reg> for Width {
fn from(value: Reg) -> Self {
value.width
}
}
-54
View File
@@ -1,54 +0,0 @@
mod setup;
use setup::*;
#[test]
fn mov() {
let c = &mut TestCtx::new("mov");
for dst in regs() {
for src in regs() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
for dst in regs() {
for src in mems() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
for dst in regs() {
for src in imms() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
for dst in mems() {
for src in regs() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
for dst in mems() {
for src in imms() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
}
#[test]
fn add_sub() {
let c = &mut TestCtx::new("mov");
for dst in regs() {
for src in imms() {
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
}
}
for dst in regs() {
for src in imms() {
eq(c, format!("sub {dst}, {src}"), |c| c.sub(dst, src))
}
}
}
-165
View File
@@ -1,165 +0,0 @@
use crate::arch::x86_64::*;
use std::{collections::HashMap, fs::OpenOptions, io::Write, process::Command};
const DISPS: &[i32] = &[
0x0,
i8::MIN as i32,
i8::MAX as i32,
i16::MIN as i32,
i16::MAX as i32,
i32::MIN,
i32::MAX,
];
const IMMS: &[i128] = &[
0x0,
i8::MIN as i128,
i8::MAX as i128,
i16::MIN as i128,
i16::MAX as i128,
i32::MIN as i128,
i32::MAX as i128,
i64::MIN as i128,
i64::MAX as i128,
u8::MAX as i128,
u8::MAX as i128 + 1,
u16::MAX as i128,
u16::MAX as i128 + 1,
u32::MAX as i128,
u32::MAX as i128 + 1,
i64::MAX as i128,
];
const WIDTHS: &[Width] = &[Width::B8, Width::B16, Width::B32, Width::B64];
pub fn imms() -> impl Iterator<Item = i128> {
IMMS.iter().cloned()
}
pub fn regs() -> impl Iterator<Item = Reg> {
Reg::IMPORTANT.iter().cloned()
}
pub fn mems() -> impl Iterator<Item = Mem> {
gen move {
for &reg in Reg::IMPORTANT {
for &disp in DISPS {
for &width in WIDTHS {
yield mem(reg, disp, width);
}
}
}
}
}
pub struct TestCtx {
path: String,
code: Code,
cache: HashMap<String, Result<Vec<u8>, String>>,
changed: bool,
}
pub fn eq(
ctx: &mut TestCtx,
asm: impl AsRef<str>,
instr: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>,
) {
let asm = asm.as_ref();
let expected = if let Some(val) = ctx.cache.get(asm) {
val
} else {
ctx.changed = true;
let res = nasm(asm);
ctx.cache.insert(asm.to_string(), res);
ctx.cache.get(asm).unwrap()
};
let code = &mut ctx.code;
let res = instr(code);
match (expected, res) {
(Ok(expected), Err(e)) => {
panic!(
"{asm}: failed to compile: {}\nexpected: {expected:x?}",
e.msg
);
}
(Err(e), Ok(_)) => {
let res = &code.bytes[..];
panic!("{asm}: should not have compiled:\n{e}\ngot: {res:x?}");
}
(Err(_), Err(_)) => (),
(Ok(expected), Ok(_)) => {
let res = &code.bytes[..];
if expected != res {
panic!("{asm}: expected {expected:x?}, got {res:x?}")
}
}
}
code.bytes.clear();
}
fn nasm(input: &str) -> Result<Vec<u8>, String> {
let fin = "/tmp/69420nasm_in.asm";
let fout = "/tmp/69420nasm_out.o";
let input = "result:".to_string() + input;
write(fin, input.as_bytes());
run(["nasm", "-w+error", "-felf64", fin, &format!("-o{fout}")])?;
let output = run(["objdump", "--no-addresses", "-dw", "-Mintel", fout])?;
let mut iter = output.lines().skip_while(|l| !l.contains("result")).skip(1);
let res_line = iter.next().unwrap().trim();
let end = res_line.find("\t").unwrap();
let res_line = &res_line[..end];
let bytes = res_line
.trim()
.split(" ")
.map(|s| u8::from_str_radix(s, 16).unwrap())
.collect();
Ok(bytes)
}
fn run<const N: usize>(input: [&str; N]) -> Result<String, String> {
let path = input[0];
let mut cmd = Command::new(path);
cmd.args(&input[1..]);
let output = cmd.output().expect("failed to run");
if output.status.code().unwrap() != 0 {
return Err(output.stderr.try_into().unwrap());
}
Ok(output.stdout.try_into().unwrap())
}
fn write(path: &str, binary: &[u8]) {
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(path)
.expect("Failed to create file");
file.write_all(binary).expect("Failed to write to file");
file.sync_all().expect("Failed to sync file");
}
const CACHE_PATH: &str = "test/nasm_cache";
impl TestCtx {
pub fn new(name: &str) -> Self {
let path = CACHE_PATH.to_string() + "/" + name;
let cache = match std::fs::read(&path) {
Ok(bytes) => bitcode::decode(&bytes).unwrap_or_default(),
Err(_) => Default::default(),
};
Self {
path,
code: Default::default(),
cache,
changed: Default::default(),
}
}
}
impl Drop for TestCtx {
fn drop(&mut self) {
if self.changed {
write(&self.path, &bitcode::encode(&self.cache));
}
}
}
-54
View File
@@ -1,54 +0,0 @@
use super::*;
#[test]
fn hello() -> Result<(), CompilerMsg> {
let mut program = Program::<X86_64>::default();
let text = b"Hello world!\n";
let text_sym = program.ro_data("hello_en", text);
let text2 = "世界、こんにちは!\n";
let text_sym2 = program.ro_data("hello_jp", text2);
let hello2 = program.func(
"hello2",
[Instr::Asm(encode(|c| {
c.mov(ax, 1)?;
c.mov(di, 1)?;
c.lea(rsi, text_sym2);
c.mov(dx, text2.len() as u64)?;
c.syscall();
c.ret();
Ok(())
})?)],
);
let entry = program.func(
"main",
[Instr::Asm(encode(|c| {
c.mov(rdi, 39)?;
c.push(rdi)?;
c.mov(ax, 1)?;
c.mov(di, 1)?;
c.lea(rsi, text_sym);
c.mov(dx, text.len() as u64)?;
c.syscall();
c.call(hello2);
c.mov(ax, 0x3c)?;
c.pop(rdi)?;
c.syscall();
Ok(())
})?)],
);
program.entry = Some(entry);
let linked = program.compile().expect("failed to compile");
let binary = linked.to_elf();
let path = "test/bin/x86_64_test";
write(path, &binary);
println!("running...");
let mut cmd = Command::new(path);
let output = cmd.output().expect("failed to run");
let Some(code) = output.status.code() else {
panic!("no exit code");
};
let result: String = output.stdout.try_into().expect("non ascii output");
assert_eq!(result, "Hello world!\n世界、こんにちは!\n");
assert_eq!(code, 39);
Ok(())
}
-11
View File
@@ -1,11 +0,0 @@
mod linux;
mod util;
mod windows;
use crate::{
arch::x86_64::*,
backend::{Instr, Program},
io::CompilerMsg,
};
use std::process::Command;
use util::*;
-13
View File
@@ -1,13 +0,0 @@
use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt};
pub fn write(path: &str, binary: &[u8]) {
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.mode(0o750)
.open(path)
.expect("Failed to create file");
file.write_all(binary).expect("Failed to write to file");
file.sync_all().expect("Failed to sync file");
}
-48
View File
@@ -1,48 +0,0 @@
use super::*;
#[test]
fn hello() -> Result<(), CompilerMsg> {
let mut program = Program::<X86_64>::default();
let [get_std_handle, write_file, exit_process] =
program.external("KERNEL32.dll", ["GetStdHandle", "WriteFile", "ExitProcess"]);
let text = b"Hello world!\n";
let text_sym = program.ro_data("hello_en", text);
let written = program.ro_data("written", [0; 4]);
let entry = program.func(
"main",
[Instr::Asm(encode(|c| {
c.sub(esp, 0x28)?;
// stdout
c.mov(ecx, -11)?;
c.call_mem(get_std_handle);
// write
c.mov(rcx, rax)?;
c.lea(rdx, text_sym);
c.mov(r8d, text.len() as u64)?;
c.lea(r9, written);
c.mov(mem(rsp, 0x20, Width::B32), 0)?;
c.call_mem(write_file);
// exit
c.mov(ecx, 39)?;
c.call_mem(exit_process);
Ok(())
})?)],
);
program.entry = Some(entry);
let linked = program.compile().expect("failed to compile");
let binary = linked.to_pe();
let path = "test/bin/x86_64_test.exe";
write(path, &binary);
let mut cmd = Command::new("wine");
cmd.arg(path);
let output = cmd.output().expect("failed to run");
let Some(code) = output.status.code() else {
panic!("no exit code");
};
let result: String = output.stdout.try_into().expect("non ascii output");
assert_eq!(result, "Hello world!\n");
assert_eq!(code, 39);
Ok(())
}
-2
View File
@@ -1,2 +0,0 @@
mod full;
mod asm;
-171
View File
@@ -1,171 +0,0 @@
use std::num::TryFromIntError;
use super::*;
#[derive(Clone, Copy)]
pub struct Mem {
pub reg: Reg,
pub disp: i32,
pub width: Width,
}
#[derive(Clone, Copy)]
pub enum RegImmMem {
Reg(Reg),
Imm(Imm),
Mem(Mem),
}
#[derive(Clone, Copy)]
pub enum RegMem {
Reg(Reg),
Mem(Mem),
}
#[derive(Clone, Copy, PartialEq, PartialOrd)]
pub struct Imm(pub i128);
pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem {
Mem { reg, disp, width }
}
impl Imm {
pub fn overflow_msg() -> CompilerMsg {
"immediate overflow".into()
}
pub fn width_signed(&self) -> Result<Width, CompilerMsg> {
Ok(match self.0 {
-0x80..=0x7f => Width::B8,
-0x8000..=0x7fff => Width::B16,
-0x8000_0000..=0x7fff_ffff => Width::B32,
-0x8000_0000_0000_0000..=0x7fff_ffff_ffff_ffff => Width::B64,
_ => return Err(Self::overflow_msg()),
})
}
pub fn width_unsigned(&self) -> Result<Width, CompilerMsg> {
Ok(match self.0 {
-0xff..=0xff => Width::B8,
-0xffff..=0xffff => Width::B16,
-0xffff_ffff..=0xffff_ffff => Width::B32,
-0xffff_ffff_ffff_ffff..=0xffff_ffff_ffff_ffff => Width::B64,
_ => return Err(Self::overflow_msg()),
})
}
pub fn reinterpret(&self, width: Width) -> Self {
Self(match width {
Width::B8 => self.0 as i8 as i128,
Width::B16 => self.0 as i16 as i128,
Width::B32 => self.0 as i32 as i128,
Width::B64 => self.0 as i64 as i128,
})
}
}
impl TryFrom<Imm> for u8 {
type Error = TryFromIntError;
fn try_from(value: Imm) -> Result<Self, Self::Error> {
value.0.try_into()
}
}
impl std::fmt::Display for Mem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Mem { reg, disp, width } = *self;
let size = match width {
Width::B8 => "BYTE",
Width::B16 => "WORD",
Width::B32 => "DWORD",
Width::B64 => "QWORD",
};
write!(f, "{size} [{reg} {}]", signed_hex(disp as i128, true))
}
}
// fromrot
impl From<Reg> for RegImmMem {
fn from(value: Reg) -> Self {
Self::Reg(value)
}
}
impl From<Reg> for RegMem {
fn from(value: Reg) -> Self {
Self::Reg(value)
}
}
impl From<Mem> for RegImmMem {
fn from(value: Mem) -> Self {
Self::Mem(value)
}
}
impl From<Mem> for RegMem {
fn from(value: Mem) -> Self {
Self::Mem(value)
}
}
impl From<u64> for RegImmMem {
fn from(value: u64) -> Self {
Self::Imm(value.into())
}
}
impl From<i64> for RegImmMem {
fn from(value: i64) -> Self {
Self::Imm(value.into())
}
}
impl From<i32> for RegImmMem {
fn from(value: i32) -> Self {
Self::Imm(value.into())
}
}
impl From<i128> for RegImmMem {
fn from(value: i128) -> Self {
Self::Imm(value.into())
}
}
impl From<u64> for Imm {
fn from(value: u64) -> Self {
Self(value as i128)
}
}
impl From<i64> for Imm {
fn from(value: i64) -> Self {
Self(value as i128)
}
}
impl From<i32> for Imm {
fn from(value: i32) -> Self {
Self(value as i128)
}
}
impl From<i128> for Imm {
fn from(value: i128) -> Self {
Self(value)
}
}
impl std::fmt::Display for Imm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl std::fmt::Debug for Imm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
-175
View File
@@ -1,175 +0,0 @@
use crate::backend::Symbol;
use super::*;
pub trait ModRMRM {
fn rm(&self) -> u8;
fn addr(&self) -> EffAddr;
}
pub enum EffAddr {
Mem0,
Mem8(i8),
Mem32(i32),
Sym(Symbol),
None,
}
impl ModRMRM for Reg {
fn rm(&self) -> u8 {
self.base()
}
fn addr(&self) -> EffAddr {
EffAddr::None
}
}
impl ModRMRM for Mem {
fn rm(&self) -> u8 {
self.reg.base()
}
fn addr(&self) -> EffAddr {
const I8_MIN: i32 = i8::MIN as i32;
const I8_MAX: i32 = i8::MAX as i32;
let disp = self.disp;
match disp {
0 => {
if self.reg.base() == 0b101 {
EffAddr::Mem8(0)
} else {
EffAddr::Mem0
}
}
I8_MIN..=I8_MAX => EffAddr::Mem8(disp as i8),
_ => EffAddr::Mem32(disp),
}
}
}
impl ModRMRM for i32 {
fn rm(&self) -> u8 {
0b101
}
fn addr(&self) -> EffAddr {
EffAddr::Mem32(*self)
}
}
impl ModRMRM for Symbol {
fn rm(&self) -> u8 {
0b101
}
fn addr(&self) -> EffAddr {
EffAddr::Sym(*self)
}
}
impl ModRMReg for u8 {
fn val(&self) -> u8 {
*self
}
}
impl ModRMReg for Reg {
fn val(&self) -> u8 {
self.base()
}
}
pub trait ModRMReg {
fn val(&self) -> u8;
}
#[inline(always)]
pub fn rex(w: impl RexW, r: impl RexBit, x: u8, b: impl RexBit) -> u8 {
0b0100_0000 | bit(w.rexw(), 3) | bit(r.rex(), 2) | bit(x.rex(), 1) | bit(b.rex(), 0)
}
#[inline(always)]
fn bit(val: bool, pos: u8) -> u8 {
(val as u8) << pos
}
pub trait RexBit: Sized {
fn rex(&self) -> bool;
fn req(&self) -> bool {
false
}
}
impl RexBit for u8 {
fn rex(&self) -> bool {
*self != 0
}
}
impl RexBit for Reg {
fn rex(&self) -> bool {
self.gt8()
}
fn req(&self) -> bool {
self.gt4() && (self.width() == Width::B8) && !self.high()
}
}
impl RexBit for Mem {
fn rex(&self) -> bool {
self.reg.rex()
}
}
pub trait RexW {
fn rexw(&self) -> bool;
}
impl RexW for Width {
fn rexw(&self) -> bool {
*self == Width::B64
}
}
impl RexW for Reg {
fn rexw(&self) -> bool {
self.width().rexw()
}
}
impl RexW for u8 {
fn rexw(&self) -> bool {
*self == 1
}
}
impl RexW for Mem {
fn rexw(&self) -> bool {
self.width.rexw()
}
}
/// assumes the next instruction is directly after
pub fn addr_offset(pos: usize, addr: u64) -> [u8; 4] {
let pos = (pos + 4) as i32;
let offset = addr as i32 - pos;
offset.to_le_bytes()
}
pub struct SignedHex {
pub val: i128,
pub op: bool,
}
pub fn signed_hex(val: i128, op: bool) -> SignedHex {
SignedHex { val, op }
}
impl std::fmt::Display for SignedHex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let dsp = self.val.abs();
let sign = match (self.op, self.val < 0) {
(true, true) => "- ",
(true, false) => "+ ",
(false, true) => "-",
(false, false) => "",
};
write!(f, "{sign}0x{dsp:x}")
}
}
-135
View File
@@ -1,135 +0,0 @@
use crate::backend::{LinkedProgram, container::encode::ByteEncoder};
#[repr(C)]
#[derive(Default)]
pub struct ELF64Header {
magic: u32,
class: u8,
endianness: u8,
ei_version: u8,
os_abi: u8,
os_abi_ver: u8,
pad: [u8; 7],
ty: u16,
machine: u16,
e_version: u32,
entry: u64,
program_header_offset: u64,
section_header_offset: u64,
flags: u32,
header_size: u16,
program_header_entry_size: u16,
program_header_num: u16,
section_header_entry_size: u16,
section_header_num: u16,
section_header_str_idx: u16,
}
#[repr(C)]
#[derive(Default)]
pub struct ProgramHeader {
ty: u32,
flags: u32,
offset: u64,
vaddr: u64,
paddr: u64,
filesz: u64,
memsz: u64,
align: u64,
}
#[repr(C)]
pub struct SectionHeader {
name_idx: u32,
ty: u32,
flags: u64,
addr: u64,
offset: u64,
size: u64,
link: u32,
info: u32,
addr_align: u64,
entry_size: u64,
}
pub enum Arch {
X86_64,
Riscv,
}
impl Arch {
pub fn machine(&self) -> u16 {
match self {
Arch::X86_64 => 0x3e,
Arch::Riscv => 0xf3,
}
}
}
#[repr(u8)]
pub enum EType {
None = 0,
Rel = 1,
Exec = 2,
Dyn = 3,
Core = 4,
}
// this is currently specialized for x86_64; obviously add params later
pub fn create(program: &[u8], start_offset: u64) -> Vec<u8> {
let pie = true;
let addr_start = if pie { 0 } else { 0x400000 };
let page_size = 0x1000;
// I don't know if I have to add addr_start here, idk how it maps the memory
let program_size = std::mem::size_of_val(program) as u64;
let mut data = ByteEncoder::default();
let header = data.reserve::<ELF64Header>();
let program_header_offset = data.pos() as u64;
let program_header = data.reserve::<ProgramHeader>();
let program_pos = data.pos() as u64;
data.extend(program);
data[header] = ELF64Header {
magic: 0x7f_45_4c_46u32.swap_bytes(),
class: 0x2, // 64 bit
endianness: 0x1, // little endian
ei_version: 0x1,
os_abi: 0x0, // system-v
os_abi_ver: 0x0,
pad: [0x0; 7],
ty: if pie { EType::Dyn } else { EType::Exec } as u16,
machine: Arch::X86_64.machine(),
e_version: 0x1,
entry: addr_start + program_pos + start_offset,
program_header_offset,
section_header_offset: 0x0,
// C ABI (16 bit instruction align) + double precision floats
flags: 0x1 | 0x4,
header_size: size_of::<ELF64Header>() as u16,
program_header_entry_size: size_of::<ProgramHeader>() as u16,
program_header_num: 0x1,
section_header_entry_size: size_of::<SectionHeader>() as u16,
section_header_num: 0x0,
section_header_str_idx: 0x0,
};
data[program_header] = ProgramHeader {
ty: 0x1, // LOAD
flags: 0b101, // executable, readable
offset: 0x0,
vaddr: addr_start,
paddr: 0x0,
filesz: program_size,
memsz: program_size,
align: page_size,
};
data.data
}
impl LinkedProgram<u64> {
pub fn to_elf(&self) -> Vec<u8> {
create(&self.code, self.entry.expect("no start"))
}
}
-154
View File
@@ -1,154 +0,0 @@
use std::ops::{Index, IndexMut};
#[derive(Default)]
pub struct ByteEncoder {
pub data: Vec<u8>,
}
impl ByteEncoder {
pub fn push(&mut self, byte: u8) {
self.data.push(byte);
}
pub fn val<T>(&mut self, val: &T) -> Reserved<T> {
let pos = self.pos();
let slice =
unsafe { core::slice::from_raw_parts((val as *const T) as *const u8, size_of::<T>()) };
self.data.extend(slice);
Reserved::new(pos)
}
pub fn pos(&self) -> usize {
self.data.len()
}
pub fn align(&mut self, align: usize) {
self.data.resize(self.data.len().next_multiple_of(align), 0);
}
fn ptr_at<T>(&mut self, index: usize) -> *mut T {
let slice = &mut self.data[index..index + size_of::<T>()];
(slice as *mut [u8]) as *mut T
}
pub fn set_at<T>(&mut self, index: usize, val: T) {
let ptr = self.ptr_at::<T>(index);
unsafe {
ptr.write_unaligned(val);
}
}
pub fn edit_at<T>(&mut self, index: usize, edit: impl FnOnce(T) -> T) {
let ptr = self.ptr_at::<T>(index);
unsafe {
let val = ptr.read_unaligned();
ptr.write_unaligned(edit(val));
}
}
#[must_use]
pub fn reserve<T>(&mut self) -> Reserved<T> {
let pos = self.pos();
self.data.resize(self.data.len() + size_of::<T>(), 0);
Reserved::new(pos)
}
pub fn pad(&mut self, amt: usize) {
self.data.resize(self.data.len() + amt, 0);
}
#[must_use]
pub fn reserve_arr<T>(&mut self, len: usize) -> ReservedArr<T> {
let pos = self.pos();
self.data.resize(self.data.len() + size_of::<T>() * len, 0);
ReservedArr::new(pos, len)
}
}
pub struct Reserved<T> {
pos: usize,
_pd: std::marker::PhantomData<T>,
}
impl<T> Clone for Reserved<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for Reserved<T> {}
pub struct ReservedArr<T> {
pos: usize,
len: usize,
_pd: std::marker::PhantomData<T>,
}
impl<T> Clone for ReservedArr<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for ReservedArr<T> {}
impl<T> Reserved<T> {
fn new(pos: usize) -> Self {
Self {
pos,
_pd: std::marker::PhantomData,
}
}
}
impl<T> ReservedArr<T> {
fn new(pos: usize, len: usize) -> Self {
Self {
pos,
len,
_pd: std::marker::PhantomData,
}
}
}
impl<T> Index<Reserved<T>> for ByteEncoder {
type Output = T;
fn index(&self, index: Reserved<T>) -> &Self::Output {
let slice = &self.data[index.pos..index.pos + size_of::<T>()];
unsafe { &core::slice::from_raw_parts((slice as *const [u8]) as *const T, 1)[0] }
}
}
impl<T> IndexMut<Reserved<T>> for ByteEncoder {
fn index_mut(&mut self, index: Reserved<T>) -> &mut Self::Output {
let slice = &mut self.data[index.pos..index.pos + size_of::<T>()];
unsafe { &mut core::slice::from_raw_parts_mut((slice as *mut [u8]) as *mut T, 1)[0] }
}
}
impl<T> Index<ReservedArr<T>> for ByteEncoder {
type Output = [T];
fn index(&self, index: ReservedArr<T>) -> &Self::Output {
let slice = &self.data[index.pos..index.pos + size_of::<T>() * index.len];
unsafe { core::slice::from_raw_parts((slice as *const [u8]) as *const T, index.len) }
}
}
impl<T> IndexMut<ReservedArr<T>> for ByteEncoder {
fn index_mut(&mut self, index: ReservedArr<T>) -> &mut Self::Output {
let slice = &mut self.data[index.pos..index.pos + size_of::<T>() * index.len];
unsafe { core::slice::from_raw_parts_mut((slice as *mut [u8]) as *mut T, index.len) }
}
}
impl Extend<u8> for ByteEncoder {
fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
self.data.extend(iter);
}
}
impl<'a> Extend<&'a u8> for ByteEncoder {
fn extend<T: IntoIterator<Item = &'a u8>>(&mut self, iter: T) {
self.data.extend(iter);
}
}
-5
View File
@@ -1,5 +0,0 @@
pub mod elf;
mod encode;
pub mod pe;
use encode::*;
-27
View File
@@ -1,27 +0,0 @@
#[derive(Default)]
#[repr(C)]
pub struct DataDirs {
pub export: DataDir,
pub import: DataDir,
pub rsc: DataDir,
pub exception: DataDir,
pub cert: DataDir,
pub base_reloc: DataDir,
pub debug: DataDir,
pub arch: DataDir,
pub global_ptr: DataDir,
pub tls: DataDir,
pub load_config: DataDir,
pub bound_import: DataDir,
pub import_addr: DataDir,
pub delay_import_desc: DataDir,
pub clr_runtime_header: DataDir,
pub reserved: DataDir,
}
#[derive(Default)]
#[repr(C)]
pub struct DataDir {
pub virt_addr_rva: u32,
pub size: u32,
}
-65
View File
@@ -1,65 +0,0 @@
#[repr(C)]
pub struct MZHeader {
pub magic: u16,
pub stuff: [u16; 15 + 4 + 10],
pub lfanew: u32,
}
#[repr(C)]
pub struct PeHeader {
pub magic: u32,
pub machine: u16,
pub num_sections: u16,
pub time_date_stamp: u32,
pub sym_tab_ptr: u32,
pub num_symbols: u32,
pub opt_header_size: u16,
pub characteristics: u16,
}
#[repr(C)]
pub struct OptHeader64 {
pub magic: u16,
pub major_linker_ver: u8,
pub minor_linker_ver: u8,
pub code_size: u32,
pub init_data_size: u32,
pub uninit_data_size: u32,
pub entry_addr: u32,
pub code_base: u32,
pub image_base: u64,
pub section_align: u32,
pub file_align: u32,
pub major_os_ver: u16,
pub minor_os_ver: u16,
pub major_image_ver: u16,
pub minor_image_ver: u16,
pub major_subsystem_ver: u16,
pub minor_subsystem_ver: u16,
pub win32_ver: u32,
pub image_size: u32,
pub headers_size: u32,
pub checksum: u32,
pub subsystem: u16,
pub dll_characteristics: u16,
pub stack_reserve_size: u64,
pub stack_commit_size: u64,
pub heap_reserve_size: u64,
pub heap_commit_size: u64,
pub loader_flags: u32,
pub num_of_rva_and_sizes: u32,
}
#[repr(C)]
pub struct Section {
pub name: [u8; 8],
pub virtual_size: u32,
pub virtual_addr: u32,
pub raw_data_size: u32,
pub raw_data_ptr: u32,
pub reloc_ptr: u32,
pub line_num_ptr: u32,
pub num_relocs: u16,
pub num_line_nums: u16,
pub characteristics: u32,
}
-94
View File
@@ -1,94 +0,0 @@
use crate::backend::{LibImport, pe::data_dir::DataDir};
use super::ByteEncoder;
pub fn encode(data: &mut ByteEncoder, imports: &[LibImport], code_start: usize) -> DataDir {
data.align(4);
let start = data.pos() as u32;
let idt = data.reserve_arr::<ImportDirTable>(imports.len());
// null entry to mark end
data.pad(size_of::<ImportDirTable>());
let end = data.pos() as u32;
for (i, import) in imports.iter().enumerate() {
// name
let name_rva = data.pos() as u32;
data.extend(import.name.as_bytes());
data.push(0);
// lookup table
data.align(size_of::<ImportLookupEntry>());
let lookup_start = data.pos();
let lookup = data.reserve_arr::<ImportLookupEntry>(import.syms.len());
data.pad(size_of::<ImportLookupEntry>());
for (i, sym) in import.syms.iter().enumerate() {
let rva = hint_name_entry(data, 0, &sym.name);
data[lookup][i] = ImportLookupEntry::name(rva);
}
// address table
data.align(size_of::<ImportLookupEntry>());
let addr_start = data.pos();
for (i, sym) in import.syms.iter().enumerate() {
let here = data.pos() as i32;
for &usage in &sym.usages {
// NOTE: sets relative offet rn
let code_pos = code_start + usage;
data.set_at::<i32>(code_pos, here - code_pos as i32 - 4);
}
let entry = data[lookup][i];
data.val(&entry);
}
data.pad(size_of::<ImportLookupEntry>());
// entry
data[idt][i] = ImportDirTable {
lookup_table_rva: lookup_start as u32,
time_date_stamp: 0,
forwarder_chain: 0,
name_rva,
address_table_rva: addr_start as u32,
};
}
DataDir {
virt_addr_rva: start,
size: end - start,
}
}
#[repr(C)]
pub struct ImportDirTable {
pub lookup_table_rva: u32,
pub time_date_stamp: u32,
pub forwarder_chain: u32,
pub name_rva: u32,
pub address_table_rva: u32,
}
#[repr(C)]
#[derive(Clone, Copy)]
pub struct ImportLookupEntry(u64);
impl ImportLookupEntry {
pub const NULL: Self = Self(0);
pub fn name(hint_name_table_rva: u32) -> Self {
assert!(hint_name_table_rva >> 30 == 0);
Self(hint_name_table_rva as u64)
}
pub fn ordinal(ordinal: u16) -> Self {
Self(ordinal as u64 | (1 << 63))
}
pub fn bytes(&self) -> [u8; 8] {
self.0.to_le_bytes()
}
}
pub fn hint_name_entry(data: &mut ByteEncoder, hint: u16, name: &str) -> u32 {
let pos = data.pos() as u32;
data.extend(hint.to_le_bytes());
data.extend(name.as_bytes());
data.push(0);
data.align(2);
pos
}
-114
View File
@@ -1,114 +0,0 @@
mod data_dir;
mod header;
mod import;
use super::*;
use crate::backend::LinkedProgram;
use data_dir::*;
use header::*;
pub fn create(program: &LinkedProgram<u64>) -> Vec<u8> {
let mut data = ByteEncoder::default();
let file_align = 1;
let section_align = 1;
let num_of_rva_and_sizes: u32 = (size_of::<DataDirs>() / size_of::<DataDir>()) as u32;
let mz_header = data.reserve::<MZHeader>();
let pe_header_pos = data.pos();
data.val(&PeHeader {
magic: u32::from_ne_bytes(*b"PE\0\0"),
machine: 0x8664,
num_sections: 1,
time_date_stamp: 0,
sym_tab_ptr: 0,
num_symbols: 0,
opt_header_size: (size_of::<OptHeader64>() + size_of::<DataDirs>()) as u16,
// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format
// executable | can handle >2GB addrs | debug info removed
characteristics: 0x2 | 0x20 | 0x0200,
});
data[mz_header] = MZHeader {
magic: u16::from_ne_bytes(*b"MZ"),
stuff: [0; _],
lfanew: pe_header_pos as u32,
};
let opt_header = data.reserve::<OptHeader64>();
let data_dirs = data.val(&DataDirs::default());
let code_sect = data.reserve::<Section>();
let hdr_size = data.pos() as u32;
// .text start
let text_start = data.pos() as u32;
let code_start = data.pos();
data.extend(&program.code);
if !program.imports.is_empty() {
let import_rva = import::encode(&mut data, &program.imports, code_start);
data[data_dirs].import = import_rva;
}
let text_size = data.pos() as u32 - text_start;
// .text end
data[code_sect] = Section {
name: *b".text\0\0\0",
virtual_size: text_size,
virtual_addr: hdr_size.next_multiple_of(section_align),
raw_data_size: text_size.next_multiple_of(file_align),
raw_data_ptr: text_start,
reloc_ptr: 0,
line_num_ptr: 0,
num_relocs: 0,
num_line_nums: 0,
characteristics: 0x60000020,
};
let file_size = data.pos() as u32;
data[opt_header] = OptHeader64 {
magic: 0x20b,
major_linker_ver: 8,
minor_linker_ver: 0,
code_size: text_size.next_multiple_of(file_align),
init_data_size: 0,
uninit_data_size: 0,
entry_addr: (code_start as u64 + program.entry.unwrap()) as u32,
code_base: text_start,
image_base: 0x400000,
section_align,
file_align,
major_os_ver: 4,
minor_os_ver: 0,
major_image_ver: 0,
minor_image_ver: 0,
major_subsystem_ver: 4,
minor_subsystem_ver: 0,
win32_ver: 0,
image_size: file_size.next_multiple_of(section_align),
headers_size: hdr_size.next_multiple_of(file_align),
checksum: 0,
subsystem: 3, // windows CLI app
dll_characteristics: 0x400,
stack_reserve_size: 0x100000,
stack_commit_size: 0x1000,
heap_reserve_size: 0x100000,
heap_commit_size: 0x1000,
loader_flags: 0,
num_of_rva_and_sizes,
};
data.data
}
impl LinkedProgram<u64> {
pub fn to_pe(&self) -> Vec<u8> {
create(&self)
}
}
-15
View File
@@ -1,15 +0,0 @@
pub trait Addr: Clone + Copy {
fn from_len(len: usize) -> Self;
}
impl Addr for u64 {
fn from_len(len: usize) -> Self {
len as Self
}
}
impl Addr for u32 {
fn from_len(len: usize) -> Self {
len as Self
}
}
-126
View File
@@ -1,126 +0,0 @@
mod addr;
mod symbol;
pub use addr::*;
pub use symbol::*;
use crate::{arch::Arch, backend::LinkedProgram, io::CompilerMsg};
pub struct Program<A: Arch> {
pub ro_data: Vec<Data>,
pub funcs: Vec<Func<A>>,
pub entry: Option<Symbol>,
pub external: Vec<External>,
sym_info: Vec<SymInfo>,
sym_count: usize,
}
pub struct Data {
pub bytes: Vec<u8>,
pub sym: Symbol,
}
pub struct Func<A: Arch> {
pub instrs: Vec<Instr<A>>,
pub sym: Symbol,
}
pub struct External {
pub file: String,
pub syms: Vec<Symbol>,
}
pub struct SymInfo {
pub name: String,
pub external: bool,
}
pub enum Instr<A: Arch> {
Set { dst: VarId, src: Vec<u8> },
Call { dst: FnId, args: Vec<VarId> },
Copy { dst: VarId, src: VarId },
Asm(A::Asm),
}
pub type VarId = usize;
pub type FnId = usize;
impl<A: Arch> Program<A> {
pub fn encode_data(&self, data: &mut Vec<u8>, sym_tab: &mut SymTable<A::Addr>) {
for d in &self.ro_data {
let addr = A::Addr::from_len(data.len());
data.extend(&d.bytes);
sym_tab.insert(d.sym, addr);
}
}
pub fn ro_data(&mut self, name: impl Into<String>, bytes: impl Into<Vec<u8>>) -> Symbol {
let bytes = bytes.into();
let sym = self.reserve(SymInfo {
name: name.into(),
external: false,
});
self.ro_data.push(Data { bytes, sym });
sym
}
pub fn func(&mut self, name: impl Into<String>, instrs: impl Into<Vec<Instr<A>>>) -> Symbol {
let instrs = instrs.into();
let sym = self.reserve(SymInfo {
name: name.into(),
external: false,
});
self.funcs.push(Func { instrs, sym });
sym
}
pub fn external<const LEN: usize>(
&mut self,
file: impl Into<String>,
names: [impl Into<String>; LEN],
) -> [Symbol; LEN] {
let syms = names.map(|s| {
self.reserve(SymInfo {
name: s.into(),
external: true,
})
});
self.external.push(External {
file: file.into(),
syms: syms.to_vec(),
});
syms
}
fn reserve(&mut self, info: SymInfo) -> Symbol {
let res = Symbol(self.sym_count);
self.sym_info.push(info);
self.sym_count += 1;
res
}
pub fn compile(&self) -> Result<LinkedProgram<A::Addr>, CompilerMsg> {
A::compile(self)
}
pub fn sym_count(&self) -> usize {
self.sym_count
}
pub fn sym_info(&self, sym: Symbol) -> &SymInfo {
&self.sym_info[sym.0]
}
}
impl<A: Arch> Default for Program<A> {
fn default() -> Self {
Self {
ro_data: Default::default(),
funcs: Default::default(),
entry: Default::default(),
sym_count: Default::default(),
external: Default::default(),
sym_info: Default::default(),
}
}
}
-15
View File
@@ -1,15 +0,0 @@
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct Symbol(pub(super) usize);
pub struct SymTable<Addr>(Vec<Option<Addr>>);
impl<Addr: Clone + Copy> SymTable<Addr> {
pub fn new(len: usize) -> Self {
Self(vec![None; len])
}
pub fn insert(&mut self, sym: Symbol, addr: Addr) {
self.0[sym.0] = Some(addr);
}
pub fn get(&self, sym: Symbol) -> Option<Addr> {
self.0[sym.0]
}
}
-15
View File
@@ -1,15 +0,0 @@
pub struct LinkedProgram<Addr> {
pub code: Vec<u8>,
pub entry: Option<Addr>,
pub imports: Vec<LibImport>,
}
pub struct LibImport {
pub name: String,
pub syms: Vec<SymImport>,
}
pub struct SymImport {
pub name: String,
pub usages: Vec<usize>,
}
-7
View File
@@ -1,7 +0,0 @@
mod container;
mod ir;
mod link;
pub use container::*;
pub use ir::*;
pub use link::*;
-142
View File
@@ -1,142 +0,0 @@
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Copy)]
pub struct Span {
pub file: usize,
pub start: usize,
pub end: usize,
}
pub struct Spanned<T> {
pub inner: T,
pub span: Span,
}
impl<T> std::ops::Deref for Spanned<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl<T> std::ops::DerefMut for Spanned<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
#[derive(Debug)]
pub struct CompilerMsg {
pub spans: Vec<Span>,
pub msg: String,
}
#[derive(Default)]
pub struct CompilerOutput {
pub errors: Vec<CompilerMsg>,
pub files: Vec<PathBuf>,
}
impl CompilerOutput {
pub fn new() -> Self {
Self::default()
}
pub fn error(&mut self, msg: impl Into<CompilerMsg>) {
self.errors.push(msg.into());
}
pub fn write(&self, w: &mut impl std::io::Write) {
let files: Vec<_> = self
.files
.iter()
.map(|path| std::fs::read_to_string(path).unwrap())
.collect();
for error in &self.errors {
writeln!(w, "Error: {}", error.msg).unwrap();
for span in &error.spans {
span.write(w, &files[span.file]).unwrap();
}
}
}
}
impl Span {
pub fn write(&self, w: &mut impl std::io::Write, text: &str) -> std::io::Result<()> {
let mut line_start = 0;
let mut found = false;
let mut line = 1;
let mut spans = Vec::new();
for (i, c) in text.char_indices() {
if i == self.start {
found = true;
}
if i == self.end {
found = true;
}
if c == '\n' {
if found {
spans.push((line, line_start..i));
}
line_start = i + 1;
line += 1;
found = false;
}
}
let underline = "\x1b[4:3m";
let underline_color = "\x1b[58;5;1m";
let end = "\x1b[0m";
if let [(line, range)] = &spans[..] {
writeln!(
w,
" {line:3} | {}{underline}{underline_color}{}{end}{}",
&text[range.start..self.start],
&text[self.start..=self.end],
&text[(self.end + 1)..range.end]
)?;
} else if let [(sline, srange), (eline, erange)] = &spans[..] {
writeln!(
w,
" {sline:3} | {}{underline}{underline_color}{}{end}",
&text[srange.start..self.start],
&text[self.start..=srange.end - 1],
)?;
if *eline != *sline + 1 {
writeln!(w, " ...")?;
}
writeln!(
w,
" {eline:3} | {underline}{underline_color}{}{end}{}",
&text[erange.start..=self.end],
&text[(self.end + 1)..=erange.end - 1],
)?;
}
Ok(())
}
}
impl From<String> for CompilerMsg {
fn from(msg: String) -> Self {
Self {
spans: Vec::new(),
msg,
}
}
}
impl From<&str> for CompilerMsg {
fn from(msg: &str) -> Self {
Self {
spans: Vec::new(),
msg: msg.to_string(),
}
}
}
impl<S: Into<String>> From<(S, Span)> for CompilerMsg {
fn from((msg, span): (S, Span)) -> Self {
Self {
spans: vec![span],
msg: msg.into(),
}
}
}
-51
View File
@@ -1,51 +0,0 @@
use std::ops::{Index, IndexMut};
pub struct Id<T> {
idx: usize,
_pd: std::marker::PhantomData<T>,
}
pub struct IdVec<T> {
vec: Vec<T>,
}
impl<T> IdVec<T> {
pub fn add(&mut self, val: T) -> Id<T> {
let id = Id {
idx: self.vec.len(),
_pd: Default::default(),
};
self.vec.push(val);
id
}
}
impl<T> Index<Id<T>> for IdVec<T> {
type Output = T;
fn index(&self, index: Id<T>) -> &Self::Output {
&self.vec[index.idx]
}
}
impl<T> IndexMut<Id<T>> for IdVec<T> {
fn index_mut(&mut self, index: Id<T>) -> &mut Self::Output {
&mut self.vec[index.idx]
}
}
impl<T> Default for IdVec<T> {
fn default() -> Self {
Self {
vec: Default::default(),
}
}
}
impl<T> Clone for Id<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for Id<T> {}
-23
View File
@@ -1,23 +0,0 @@
mod id;
mod structs;
pub use id::*;
pub use structs::*;
pub struct Ir {
pub root: Id<Namespace>,
pub namespaces: IdVec<Namespace>,
}
impl Ir {
pub fn root(&mut self) -> &mut Namespace {
&mut self.namespaces[self.root]
}
}
impl Default for Ir {
fn default() -> Self {
let mut namespaces = IdVec::default();
let root = namespaces.add(Namespace::default());
Self { root, namespaces }
}
}
-38
View File
@@ -1,38 +0,0 @@
mod namespace;
pub use namespace::*;
use super::Id;
pub struct Fn {
pub body: Body,
}
pub struct Body {
pub statements: Vec<Statement>,
}
pub struct Statement {
ty: StatementTy,
}
pub enum StatementTy {
Define { target: VarId, val: VarId },
Assign { target: VarId, val: VarId },
Call { target: VarId, args: Vec<VarId> },
}
pub struct Var {
const_: bool,
ty: TypeId,
}
pub enum Type {
Unsigned(u8),
Signed(u8),
Array(TypeId),
Ptr(TypeId),
Infer,
}
pub type VarId = u32;
pub type TypeId = u32;
-11
View File
@@ -1,11 +0,0 @@
use super::*;
use std::collections::HashMap;
#[derive(Default)]
pub struct Namespace {
pub items: HashMap<String, Item>,
}
pub enum Item {
Import(Id<Namespace>),
}
+4 -17
View File
@@ -1,21 +1,8 @@
#![cfg_attr(test, feature(gen_blocks))]
use crate::{io::CompilerOutput, parser_ir::parse_program};
mod arch;
mod backend;
mod io;
mod ir;
#![feature(try_trait_v2)]
#![feature(associated_type_defaults)]
#![feature(trait_alias)]
mod parser;
mod parser_ir;
fn main() {
let mut args = std::env::args();
let Some(path) = args.nth(1) else {
println!("file expected");
return;
};
let mut output = CompilerOutput::new();
let ir = parse_program(&path, &mut output);
output.write(&mut std::io::stdout());
parser::parse(include_str!("test.lang"));
}
+108
View File
@@ -0,0 +1,108 @@
use super::*;
pub struct ParserCtx<'a> {
pub cursor: TokenCursor<'a>,
pub msgs: &'a mut Vec<CompilerMsg>,
}
impl<'a> ParserCtx<'a> {
pub fn new(cursor: impl Into<TokenCursor<'a>>, msgs: &'a mut Vec<CompilerMsg>) -> Self {
Self {
cursor: cursor.into(),
msgs,
}
}
pub fn parse<T: Parsable<Data = ()>>(&mut self) -> Option<Node<T>> {
self.parse_with(())
}
pub fn parse_with<T: Parsable>(&mut self, data: T::Data) -> Option<Node<T>> {
let data = match T::parse(self, data) {
ParseResult::Ok(t) => Some(t),
ParseResult::Node(n) => return Some(n),
ParseResult::Break(msg) => {
self.msgs.push(msg);
return None;
}
ParseResult::Continue(msg) => {
self.msgs.push(msg);
None
}
ParseResult::SubErr => {
return None;
}
};
Some(Node { data })
}
pub fn seek(&mut self, token: impl Into<Token>) -> bool {
let token = token.into();
while let Some(next) = self.next() {
if next == token {
return true;
}
}
false
}
pub fn expect_next(&mut self) -> Option<Token> {
let res = self.cursor.next();
if res.is_none() {
self.msgs.push(CompilerMsg::new(
"Unexpected end of input",
self.cursor.prev_end(),
));
}
res
}
pub fn expect_peek(&self) -> Result<&Token, CompilerMsg> {
match self.cursor.peek() {
Some(t) => Ok(t),
None => Err(self.unexpected_end()),
}
}
pub fn expect(&mut self, token: impl Into<Token>) -> Result<(), CompilerMsg> {
let token = token.into();
if self.next_is_ref(&token) {
Ok(())
} else {
Err(self.unexpected(format!("token {:?}", token)))
}
}
fn unexpected_end(&self) -> CompilerMsg {
CompilerMsg::new("Unexpected end of input", self.next_start())
}
pub fn peek(&self) -> Option<&Token> {
self.cursor.peek()
}
pub fn unexpected<'b>(&self, expected: impl std::fmt::Display) -> CompilerMsg {
if let Some((next, span)) = self.peek_span() {
CompilerMsg::new(
format!("Unexpected token {:?}, expected {}", next, expected),
span,
)
} else {
self.unexpected_end()
}
}
}
impl<'a> std::ops::Deref for ParserCtx<'a> {
type Target = TokenCursor<'a>;
fn deref(&self) -> &Self::Target {
&self.cursor
}
}
impl<'a> std::ops::DerefMut for ParserCtx<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.cursor
}
}
-38
View File
@@ -1,38 +0,0 @@
use super::Token;
use crate::io::Span;
pub struct Lit {
pub ty: LitTy,
pub span: Span,
}
#[derive(PartialEq)]
pub enum LitTy {
Number(String),
Bool(bool),
String(String),
Unit,
}
impl From<LitTy> for Token {
fn from(value: LitTy) -> Self {
Self::Lit(value)
}
}
impl std::fmt::Display for LitTy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Number(n) => write!(f, "{n}"),
Self::Bool(b) => write!(f, "{b}"),
Self::String(s) => write!(f, "\"{s}\""),
Self::Unit => write!(f, "()"),
}
}
}
impl std::fmt::Display for Lit {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.ty.fmt(f)
}
}
-102
View File
@@ -1,102 +0,0 @@
use std::borrow::Borrow;
use crate::io::{CompilerMsg, Span, Spanned};
mod lit;
mod token;
pub use lit::*;
pub use token::*;
pub struct Cursor<'a> {
pub span: Span,
next: Option<TokenInst>,
tokens: Tokens<'a>,
}
impl<'a> Cursor<'a> {
pub fn new(text: &'a str, file: usize) -> Self {
let mut s = Self {
span: Span {
start: 0,
end: 0,
file,
},
next: None,
tokens: Tokens::new(text, file),
};
s.next();
s
}
pub fn next(&mut self) -> Option<Token> {
let mut next = self.tokens.next();
std::mem::swap(&mut self.next, &mut next);
next.map(|inst| {
self.span = inst.span;
inst.inner
})
}
pub fn next_if(&mut self, token: impl Borrow<Token>) -> bool {
if self.peek().is_some_and(|t| t == token.borrow()) {
self.next();
true
} else {
false
}
}
pub fn peek(&self) -> Option<&Token> {
self.next.as_ref().map(|i| &i.inner)
}
pub fn expect_next(&mut self) -> Result<Token, CompilerMsg> {
self.next().ok_or_else(CompilerMsg::unexpected_eof)
}
pub fn expect_peek(&self) -> Result<&Token, CompilerMsg> {
self.peek().ok_or_else(CompilerMsg::unexpected_eof)
}
pub fn expect(&mut self, token: impl Borrow<Token>) -> Result<Token, CompilerMsg> {
let token = token.borrow();
let next = self.expect_next()?;
if next == *token {
Ok(next)
} else {
self.unexpected(next, &format!("'{token}'"))
}
}
pub fn unexpected<T>(&self, token: Token, expected: &str) -> Result<T, CompilerMsg> {
Err(CompilerMsg::unexpected_token(&token, self.span, expected))
}
pub fn peek_start(&mut self) -> usize {
self.next.as_ref().map(|i| i.span.start).unwrap_or(0)
}
pub fn cur_end(&mut self) -> usize {
self.span.end
}
pub fn file(&mut self) -> usize {
self.span.file
}
}
impl CompilerMsg {
pub fn unexpected_token(token: &Token, span: Span, expected: &str) -> Self {
Self {
spans: vec![span],
msg: format!("Unexpected token '{}', expected {expected}", token),
}
}
pub fn unexpected_eof() -> Self {
Self {
spans: Vec::new(),
msg: "unexpected end of file".to_string(),
}
}
}
-198
View File
@@ -1,198 +0,0 @@
use crate::parser::cursor::LitTy;
use super::{Span, Spanned};
use std::{iter::Peekable, str::CharIndices};
def_tokens! {
symbol {
Dot: ".",
Comma: ",",
Equal: "=",
Colon: ":",
Semicolon: ";",
Plus: "+",
Dash: "-",
Asterisk: "*",
Slash: "/",
OpenParen: "(",
CloseParen: ")",
OpenSquare: "[",
CloseSquare: "]",
OpenCurly: "{",
CloseCurly: "}",
Arrow: "->",
DoubleArrow: "=>",
PlusEqual: "+=",
DashEqual: "-=",
AsteriskEqual: "*=",
SlashEqual: "/=",
Hash: "#",
}
keyword {
Let: "let",
Import: "import",
Fn: "fn",
If: "if",
Loop: "loop",
While: "while",
For: "for",
Match: "match",
Break: "break",
Asm: "asm",
}
other {
Ident(String),
Lit(LitTy),
}
}
pub type TokenInst = Spanned<Token>;
pub struct Tokens<'a> {
file: usize,
chars: Peekable<CharIndices<'a>>,
}
impl<'a> Tokens<'a> {
pub fn new(code: &'a str, file: usize) -> Self {
Self {
file,
chars: code.char_indices().peekable(),
}
}
}
impl Iterator for Tokens<'_> {
type Item = Spanned<Token>;
fn next(&mut self) -> Option<Self::Item> {
let (i, c) = self.chars.next()?;
let mut span = Span {
start: i,
end: i,
file: self.file,
};
if c.is_whitespace() {
return self.next();
}
macro_rules! then {
(_ => $def:expr, $($char:expr => $to:expr,)*) => {
match self.chars.peek() {
$(Some((_, $char)) => {
self.chars.next();
$to
},)*
_ => $def,
}
};
}
let inner = match c {
'.' => Token::Dot,
',' => Token::Comma,
'(' => Token::OpenParen,
')' => Token::CloseParen,
'[' => Token::OpenSquare,
']' => Token::CloseSquare,
'{' => Token::OpenCurly,
'}' => Token::CloseCurly,
'#' => Token::Hash,
'+' => then! {
_ => Token::Plus,
'=' => Token::PlusEqual,
},
'-' => then! {
_ => Token::Dash,
'=' => Token::DashEqual,
'>' => Token::Arrow,
},
'*' => then! {
_ => Token::Asterisk,
'=' => Token::AsteriskEqual,
},
'/' => then! {
_ => Token::Slash,
'=' => Token::SlashEqual,
},
':' => Token::Colon,
';' => Token::Semicolon,
'=' => then! {
_ => Token::Equal,
'>' => Token::DoubleArrow,
},
'0'..='9' => {
let mut s = c.to_string();
while let Some((i, c)) = self.chars.peek()
&& c.is_alphanumeric()
{
s.push(*c);
span.end = *i;
self.chars.next();
}
LitTy::Number(s).into()
}
'"' => {
let mut s = String::new();
while let Some((i, c)) = self.chars.next()
&& !matches!(c, '"')
{
s.push(c);
span.end = i;
}
LitTy::String(s).into()
}
_ => {
let mut s = c.to_string();
while let Some((i, c)) = self.chars.peek()
&& c.is_alphanumeric()
{
s.push(*c);
span.end = *i;
self.chars.next();
}
match s.as_str() {
"true" => LitTy::Bool(true).into(),
"false" => LitTy::Bool(false).into(),
_ => from_str(s),
}
}
};
Some(Spanned { inner, span })
}
}
macro_rules! def_tokens {
{
symbol {
$($sym_name:ident: $sym_str:expr,)*
}
keyword {
$($kw_name:ident: $kw_str:expr,)*
}
other {
$($other_name:ident($data:ty),)*
}
} => {
#[derive(PartialEq)]
pub enum Token {
$($sym_name,)*
$($kw_name,)*
$($other_name($data),)*
}
fn from_str(s: String) -> Token {
match s.as_str() {
$($kw_str => Token::$kw_name,)*
_ => Token::Ident(s),
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
$(Token::$sym_name => write!(f, "{}", $sym_str),)*
$(Token::$kw_name => write!(f, $kw_str),)*
$(Token::$other_name(v) => write!(f, "{v}"),)*
}
}
}
};
}
use def_tokens;
+37
View File
@@ -0,0 +1,37 @@
use super::*;
#[derive(Debug)]
pub struct CompilerMsg {
msg: String,
span: CharSpan,
}
#[derive(Debug, Clone, Copy)]
pub struct CharSpan {
start: CharPos,
end: CharPos,
}
impl CharPos {
pub fn to(self, end: CharPos) -> CharSpan {
CharSpan { start: self, end }
}
}
impl CompilerMsg {
pub fn new(msg: impl Into<String>, span: impl Into<CharSpan>) -> Self {
Self {
msg: msg.into(),
span: span.into(),
}
}
}
impl From<CharPos> for CharSpan {
fn from(value: CharPos) -> Self {
Self {
start: value,
end: value,
}
}
}
+15 -28
View File
@@ -1,32 +1,19 @@
mod cursor;
mod node;
mod nodes;
mod ctx;
mod token;
mod tree;
mod io;
use std::path::Path;
pub use ctx::*;
use token::*;
pub use tree::*;
pub use io::*;
use cursor::*;
pub use node::*;
pub use nodes::*;
use crate::io::CompilerOutput;
pub fn parse_file(path: impl AsRef<Path>, output: &mut CompilerOutput) -> Option<Body> {
let code = match std::fs::read_to_string(&path) {
Ok(code) => code,
Err(err) => {
output.error(format!("Failed to read input file: {err}"));
return None;
pub fn parse(file: &str) {
let mut msgs = Vec::new();
let mut parser = ParserCtx::new(file, &mut msgs);
if let Some(block) = parser.parse_with::<PBlock>(false) {
println!("{block:#?}");
} else {
println!("{msgs:?}");
}
};
let file = output.files.len();
output.files.push(path.as_ref().to_path_buf());
let mut ctx = ParseCtx::new(Cursor::new(&code, file));
let root = match ctx.parse() {
Ok(v) => v,
Err(msg) => {
output.error(msg);
return None;
}
};
Some(root)
}
-83
View File
@@ -1,83 +0,0 @@
use crate::{
io::{CompilerMsg, Span},
parser::{
Ident, Node,
cursor::{Cursor, Lit, LitTy, Token},
},
};
pub struct ParseCtx<'a> {
start: usize,
cursor: Cursor<'a>,
}
impl<'a> ParseCtx<'a> {
pub fn new(cursor: Cursor<'a>) -> Self {
Self { start: 0, cursor }
}
pub fn parse_box<N: Node>(&mut self) -> Result<Box<N>, CompilerMsg> {
self.parse_with(N::parse).map(Box::new)
}
pub fn parse<N: Node>(&mut self) -> Result<N, CompilerMsg> {
self.parse_with(N::parse)
}
pub fn parse_with<N: Node>(
&mut self,
f: impl FnOnce(&mut Self) -> Result<N, CompilerMsg>,
) -> Result<N, CompilerMsg> {
let old_start = self.start;
self.start = self.cursor.peek_start();
let res = f(self);
self.start = old_start;
res
}
pub fn ident(&mut self, s: String) -> Ident {
let span = self.cursor.span;
Ident { name: s, span }
}
pub fn lit(&mut self, ty: LitTy) -> Lit {
let span = self.cursor.span;
Lit { ty, span }
}
pub fn span(&mut self) -> Span {
let end = self.cursor.cur_end();
Span {
file: self.cursor.file(),
start: self.start,
end,
}
}
pub fn list<N: Node>(&mut self, sep: Token, end: Token) -> Result<Vec<N>, CompilerMsg> {
let mut list = Vec::new();
if self.next_if(&end) {
return Ok(list);
}
list.push(self.parse()?);
while self.next_if(&sep) {
list.push(self.parse()?);
}
self.expect(end)?;
Ok(list)
}
}
impl<'a> std::ops::Deref for ParseCtx<'a> {
type Target = Cursor<'a>;
fn deref(&self) -> &Self::Target {
&self.cursor
}
}
impl<'a> std::ops::DerefMut for ParseCtx<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.cursor
}
}
-50
View File
@@ -1,50 +0,0 @@
use crate::parser::Node;
#[derive(Clone, Copy)]
pub struct DisplayCtx {
pub indent: usize,
}
pub struct NodeDsp<'a, N: Node> {
pub node: &'a N,
pub ctx: DisplayCtx,
}
impl<N: Node> std::fmt::Display for NodeDsp<'_, N> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.node.fmt(f, self.ctx)
}
}
pub struct VecDsp<'a, N> {
list: &'a Vec<N>,
ctx: DisplayCtx,
}
impl<N: Node> std::fmt::Display for VecDsp<'_, N> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some((last, rest)) = self.list.split_last() {
for arg in rest {
write!(f, "{}, ", arg.dsp(self.ctx))?;
}
write!(f, "{}", last.dsp(self.ctx))?;
}
Ok(())
}
}
pub trait VecDspT<N> {
fn dsp<'a, 'b>(&'a self, ctx: impl Into<DisplayCtx>) -> VecDsp<'b, N>
where
'a: 'b;
}
impl<N> VecDspT<N> for Vec<N> {
fn dsp<'a, 'b>(&'a self, ctx: impl Into<DisplayCtx>) -> VecDsp<'b, N>
where
'a: 'b,
{
let ctx = ctx.into();
VecDsp { list: self, ctx }
}
}
-17
View File
@@ -1,17 +0,0 @@
mod ctx;
mod dsp;
pub use ctx::*;
pub use dsp::*;
use crate::io::CompilerMsg;
pub trait Node: Sized {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg>;
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result;
fn dsp(&self, ctx: DisplayCtx) -> NodeDsp<'_, Self> {
NodeDsp { node: self, ctx }
}
fn new_dsp(&self) -> NodeDsp<'_, Self> {
self.dsp(DisplayCtx { indent: 0 })
}
}
-23
View File
@@ -1,23 +0,0 @@
use crate::{
arch::x86_64::Code,
parser::{Node, cursor::Token},
};
pub mod x86_64;
pub enum AsmBlock {
X86_64(Code),
}
impl Node for AsmBlock {
fn parse(ctx: &mut crate::parser::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
ctx.expect(Token::OpenCurly)?;
let asm = ctx.parse()?;
ctx.expect(Token::CloseCurly)?;
Ok(Self::X86_64(asm))
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result {
write!(f, "asm {{ ... }}")
}
}
-84
View File
@@ -1,84 +0,0 @@
use crate::{
arch::x86_64::*,
io::{CompilerMsg, Span},
parser::{
Node,
cursor::{LitTy, Token},
},
};
impl Node for Code {
fn parse(ctx: &mut crate::parser::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
let mut c = Code::default();
while let Some(Token::Ident(next)) = ctx.peek() {
match next.as_str() {
"mov" => {
ctx.next();
let dst = parse_reg(ctx)?;
ctx.expect(Token::Comma)?;
let src = parse_rmi(ctx)?;
c.mov(dst, src)?;
}
"int" => {
ctx.next();
let Token::Lit(LitTy::Number(num)) = ctx.expect_next()? else {
return Err("Expected an immediate".into());
};
let code = parse_imm(&num, ctx.span)?
.try_into()
.map_err(|_| CompilerMsg::from("Immediate must be a u8"))?;
c.int(code);
}
_ => {
let msg = format!("Unknown instruction {next}");
ctx.next();
return Err(CompilerMsg {
msg,
spans: vec![ctx.span],
});
}
}
}
Ok(c)
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result {
todo!()
}
}
pub fn parse_imm(mut s: &str, span: Span) -> Result<Imm, CompilerMsg> {
let mut radix = 10;
let mut mult = 1;
if s.starts_with('-') {
mult = -1;
s = &s[1..];
}
if s.starts_with("0x") {
radix = 16;
s = &s[2..];
}
let abs = u64::from_str_radix(s, radix)
.map_err(|_| CompilerMsg::from(("invalid immediate", span)))?;
let val = (abs as i128) * mult;
Ok(Imm(val))
}
pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result<RegImmMem, CompilerMsg> {
let next = ctx.expect_next()?;
let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register or immediate");
Ok(match &next {
Token::Ident(ident) => RegImmMem::Reg(Reg::parse(ident).ok_or_else(err)?),
Token::Lit(LitTy::Number(num)) => RegImmMem::Imm(parse_imm(num, ctx.span)?),
_ => return Err(err()),
})
}
pub fn parse_reg(ctx: &mut crate::parser::ParseCtx) -> Result<Reg, CompilerMsg> {
let next = ctx.expect_next()?;
let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register");
let Token::Ident(next) = &next else {
return Err(err());
};
Reg::parse(next).ok_or_else(err)
}
-58
View File
@@ -1,58 +0,0 @@
use super::*;
pub struct Body {
pub items: Vec<Expr>,
pub final_semicolon: bool,
pub span: Span,
}
impl Node for Body {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let mut items = Vec::new();
fn at_end(ctx: &mut ParseCtx) -> bool {
ctx.peek().is_none_or(|t| *t == Token::CloseCurly)
}
let final_semicolon = loop {
if at_end(ctx) {
break true;
}
let expr: Expr = ctx.parse()?;
let needs_semicolon = expr.needs_semicolon();
items.push(expr);
if at_end(ctx) {
break false;
}
if needs_semicolon {
ctx.expect(Token::Semicolon)?;
}
while ctx.next_if(Token::Semicolon) {}
};
Ok(Self {
items,
final_semicolon,
span: ctx.span(),
})
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
if let Some((last, rest)) = self.items.split_last() {
for i in rest {
writeln!(
f,
"{}{}{}",
" ".repeat(ctx.indent),
i.dsp(ctx),
if i.needs_semicolon() { ";" } else { "" }
)?;
}
writeln!(
f,
"{}{}{}",
" ".repeat(ctx.indent),
last.dsp(ctx),
if self.final_semicolon { ";" } else { "" }
)?;
}
Ok(())
}
}
-273
View File
@@ -1,273 +0,0 @@
use crate::parser::VecDspT;
pub use super::*;
pub struct Expr {
pub span: Span,
pub ty: ExprTy,
}
pub enum ExprTy {
Block(Body),
Group(Box<Expr>),
Member {
of: Box<Expr>,
field: Ident,
},
Ident(Ident),
Lit(Lit),
Negate(Box<Expr>),
Call {
target: Box<Expr>,
args: Vec<Expr>,
},
Assign {
target: Box<Expr>,
val: Box<Expr>,
},
Define {
target: Box<Expr>,
ty: Option<Type>,
const_: bool,
val: Box<Expr>,
},
If {
cond: Box<Expr>,
body: Box<Expr>,
},
Loop {
body: Box<Expr>,
},
While {
cond: Box<Expr>,
body: Box<Expr>,
},
Import(Ident),
Fn(Box<Func>),
Break,
Asm(AsmBlock),
}
impl Node for Expr {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let mut res = Self::unit(ctx)?;
while let Some(next) = ctx.peek() {
let ty = match next {
Token::Equal => {
ctx.next();
let target = Box::new(res);
let val = Box::new(ctx.parse_with(Self::unit)?);
ExprTy::Assign { target, val }
}
Token::Colon => {
ctx.next();
let target = Box::new(res);
let mut ty = None;
let next = ctx.expect_peek()?;
if !matches!(next, Token::Equal | Token::Colon) {
ty = Some(ctx.parse()?);
}
let const_ = match ctx.expect_next()? {
Token::Equal => false,
Token::Colon => true,
t => ctx.unexpected(t, "an equals = or colon :")?,
};
let val = Box::new(ctx.parse_with(Self::unit)?);
ExprTy::Define {
target,
ty,
val,
const_,
}
}
Token::OpenParen => {
ctx.next();
let target = Box::new(res);
let args = ctx.list(Token::Comma, Token::CloseParen)?;
ExprTy::Call { target, args }
}
Token::Dot => {
ctx.next();
let of = Box::new(res);
let field = ctx.parse()?;
ExprTy::Member { of, field }
}
_ => break,
};
res = Self {
ty,
span: ctx.span(),
};
}
Ok(res)
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
self.ty.fmt(f, ctx)
}
}
impl ExprTy {
fn fmt(&self, f: &mut std::fmt::Formatter, mut ctx: DisplayCtx) -> std::fmt::Result {
match self {
Self::Ident(ident) => ident.fmt(f, ctx),
Self::Group(expr) => write!(f, "({})", expr.dsp(ctx)),
Self::Fn(func) => func.fmt(f, ctx),
Self::Lit(lit) => write!(f, "{}", lit),
Self::Negate(expr) => {
write!(f, "-{}", expr.dsp(ctx))
}
Self::Call { target, args } => {
write!(f, "{}({})", target.dsp(ctx), args.dsp(ctx))
}
Self::Assign { target, val } => {
write!(f, "{} = {}", target.dsp(ctx), val.dsp(ctx))
}
Self::Define {
target,
ty,
val,
const_,
} => {
write!(f, "{} :", target.dsp(ctx))?;
if let Some(ty) = ty {
write!(f, " {} ", ty.dsp(ctx))?;
}
write!(f, "{} {}", if *const_ { ":" } else { "=" }, val.dsp(ctx))
}
Self::Member { of, field } => {
write!(f, "{}.{field}", of.dsp(ctx))
}
Self::If { cond, body } => {
write!(f, "if {} {}", cond.dsp(ctx), body.dsp(ctx))
}
Self::While { cond, body } => {
write!(f, "while {} {}", cond.dsp(ctx), body.dsp(ctx))
}
Self::Loop { body } => {
write!(f, "loop {}", body.dsp(ctx))
}
Self::Block(body) => {
write!(f, "{{")?;
if !body.items.is_empty() {
writeln!(f)?;
ctx.indent += 3;
body.fmt(f, ctx)?;
}
write!(f, "}}")?;
Ok(())
}
Self::Import(ident) => {
write!(f, "import {ident}")
}
Self::Break => {
write!(f, "break")
}
Self::Asm(asm) => asm.fmt(f, ctx),
}
}
}
impl Expr {
pub fn fmt_body(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
match self.ty {
ExprTy::Block(_) => self.fmt(f, ctx),
_ => write!(f, "=> {}", self.dsp(ctx)),
}
}
fn unit(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let ty = match ctx.expect_next()? {
Token::Dash => ExprTy::Negate(ctx.parse_box()?),
Token::Ident(s) => ExprTy::Ident(ctx.ident(s)),
Token::Lit(l) => ExprTy::Lit(ctx.lit(l)),
Token::Fn => ExprTy::Fn(ctx.parse_box()?),
Token::If => {
let cond = ctx.parse_box()?;
let body = Box::new(Self::body(ctx)?);
ExprTy::If { cond, body }
}
Token::While => {
let cond = ctx.parse_box()?;
let body = Box::new(Self::body(ctx)?);
ExprTy::While { cond, body }
}
Token::Loop => {
let body = ctx.parse_box()?;
ExprTy::Loop { body }
}
Token::OpenParen => {
if ctx.next_if(Token::CloseParen) {
ExprTy::Lit(Lit {
ty: LitTy::Unit,
span: ctx.span(),
})
} else {
let inner = ctx.parse_box()?;
ctx.expect(Token::CloseParen)?;
ExprTy::Group(inner)
}
}
Token::OpenCurly => {
let body = ctx.parse()?;
ctx.expect(Token::CloseCurly)?;
ExprTy::Block(body)
}
Token::Break => ExprTy::Break,
Token::Import => {
let ident = ctx.parse()?;
ExprTy::Import(ident)
}
Token::Asm => ExprTy::Asm(ctx.parse()?),
other => return ctx.unexpected(other, "an expression"),
};
Ok(Self {
ty,
span: ctx.span(),
})
}
pub fn is_group(&self) -> bool {
matches!(self.ty, ExprTy::Group(_))
}
pub fn is_block(&self) -> bool {
matches!(self.ty, ExprTy::Block(_))
}
pub fn block(ctx: &mut ParseCtx) -> Result<Expr, CompilerMsg> {
ctx.expect(Token::OpenCurly)?;
let id = ctx.parse()?;
ctx.expect(Token::CloseCurly)?;
Ok(Expr {
ty: ExprTy::Block(id),
span: ctx.span(),
})
}
pub fn body(ctx: &mut ParseCtx) -> Result<Expr, CompilerMsg> {
if ctx.next_if(Token::DoubleArrow) {
ctx.parse()
} else {
ctx.parse_with(Expr::block)
}
}
pub fn ends_with_block(&self) -> bool {
match &self.ty {
ExprTy::Block(..) => true,
ExprTy::Loop { body }
| ExprTy::While { body, .. }
| ExprTy::If { body, .. }
| ExprTy::Negate(body)
| ExprTy::Assign { val: body, .. } => body.ends_with_block(),
ExprTy::Define { val: body, .. } => body.ends_with_block(),
ExprTy::Fn(f) => f.ends_with_block(),
_ => false,
}
}
pub fn needs_semicolon(&self) -> bool {
!self.ends_with_block()
}
}
-49
View File
@@ -1,49 +0,0 @@
use super::*;
pub struct Func {
args: Vec<Param>,
ret: Option<Type>,
body: Expr,
span: Span,
}
impl Node for Func {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
ctx.expect(Token::OpenParen)?;
let args = ctx.list(Token::Comma, Token::CloseParen)?;
let mut ret = None;
if ctx.next_if(Token::Arrow) {
ret = Some(ctx.parse()?);
}
let body = Expr::body(ctx)?;
Ok(Self {
args,
ret,
body,
span: ctx.span(),
})
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
write!(f, "fn")?;
write!(f, "(")?;
if let Some((last, rest)) = self.args.split_last() {
for arg in rest {
write!(f, "{}, ", arg.dsp(ctx))?;
}
write!(f, "{}", last.dsp(ctx))?;
}
write!(f, ") ")?;
if let Some(ret) = &self.ret {
write!(f, "-> {} ", ret.dsp(ctx))?;
}
self.body.fmt_body(f, ctx)?;
Ok(())
}
}
impl Func {
pub fn ends_with_block(&self) -> bool {
self.body.ends_with_block()
}
}
-25
View File
@@ -1,25 +0,0 @@
use super::*;
pub struct Ident {
pub name: String,
pub span: Span,
}
impl Node for Ident {
fn parse(ctx: &mut super::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
match ctx.expect_next()? {
Token::Ident(ident) => Ok(ctx.ident(ident)),
t => ctx.unexpected(t, "an identifier"),
}
}
fn fmt(&self, f: &mut std::fmt::Formatter, _: DisplayCtx) -> std::fmt::Result {
write!(f, "{}", self.name)
}
}
impl std::fmt::Display for Ident {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.name.fmt(f)
}
}
-78
View File
@@ -1,78 +0,0 @@
use super::*;
pub struct Item {
pub ty: ItemTy,
pub span: Span,
}
pub enum ItemTy {
Let {
name: Ident,
ty: Option<Type>,
val: Expr,
},
Fn(Func),
Expr(Expr),
Import(Ident),
}
impl Node for Item {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let ty = match ctx.expect_peek()? {
Token::Fn => {
ctx.next();
ItemTy::Fn(ctx.parse()?)
}
Token::Let => {
ctx.next();
let name = ctx.parse()?;
let mut ty = None;
if ctx.next_if(Token::Colon) {
ty = Some(ctx.parse()?);
}
ctx.expect(Token::Equal)?;
let val = ctx.parse()?;
ItemTy::Let { name, ty, val }
}
Token::Import => {
ctx.next();
ItemTy::Import(ctx.parse()?)
}
_ => ItemTy::Expr(ctx.parse()?),
};
Ok(Self {
ty,
span: ctx.span(),
})
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
match &self.ty {
ItemTy::Fn(func) => func.fmt(f, ctx)?,
ItemTy::Let { name, ty, val } => {
write!(f, "let {}", name.dsp(ctx))?;
if let Some(ty) = ty {
write!(f, ": {}", ty.dsp(ctx))?;
}
write!(f, " = {}", val.dsp(ctx))?;
}
ItemTy::Expr(expr) => expr.fmt(f, ctx)?,
ItemTy::Import(ident) => write!(f, "import {}", ident.dsp(ctx))?,
}
Ok(())
}
}
impl Item {
pub fn ends_with_block(&self) -> bool {
match &self.ty {
ItemTy::Let { val, .. } => val.ends_with_block(),
ItemTy::Expr(id) => id.ends_with_block(),
ItemTy::Fn(f) => f.ends_with_block(),
ItemTy::Import(ident) => false,
}
}
pub fn needs_semicolon(&self) -> bool {
!self.ends_with_block()
}
}
-18
View File
@@ -1,18 +0,0 @@
mod asm;
mod body;
mod expr;
mod func;
mod ident;
mod param;
mod struct_;
mod ty;
pub use asm::*;
pub use body::*;
pub use expr::*;
pub use func::*;
pub use ident::*;
pub use param::*;
pub use ty::*;
use super::{DisplayCtx, Lit, LitTy, Node, ParseCtx, Token};
use crate::io::{CompilerMsg, Span};
-25
View File
@@ -1,25 +0,0 @@
use super::*;
pub struct Param {
name: Ident,
ty: Option<Type>,
}
impl Node for Param {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let name = ctx.parse()?;
let mut ty = None;
if ctx.next_if(Token::Colon) {
ty = Some(ctx.parse()?);
}
Ok(Self { name, ty })
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
self.name.fmt(f, ctx)?;
if let Some(ty) = &self.ty {
write!(f, ": {}", ty.dsp(ctx))?;
}
Ok(())
}
}
-8
View File
@@ -1,8 +0,0 @@
use super::*;
pub struct Struct {
name: String,
fields: Vec<Field>,
}
pub struct Field {}
-20
View File
@@ -1,20 +0,0 @@
use super::*;
pub enum Type {
Ident(Ident),
}
impl Node for Type {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
Ok(match ctx.expect_next()? {
Token::Ident(s) => Self::Ident(ctx.ident(s)),
t => ctx.unexpected(t, "a type")?,
})
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
match self {
Type::Ident(id) => id.fmt(f, ctx),
}
}
}
+71
View File
@@ -0,0 +1,71 @@
use std::{iter::Peekable, str::Chars};
#[derive(Clone)]
pub struct CharIter<'a> {
iter: Peekable<Chars<'a>>,
pos: CharPos,
next_pos: CharPos,
}
#[derive(Clone, Copy, Debug, Default)]
pub struct CharPos {
line: usize,
col: usize,
}
impl<'a> CharIter<'a> {
pub fn new(text: &'a str) -> Self {
Self {
iter: text.chars().peekable(),
pos: CharPos::default(),
next_pos: CharPos::default(),
}
}
pub fn next(&mut self) -> Option<char> {
let next = self.iter.next();
self.advance(next)
}
fn advance(&mut self, c: Option<char>) -> Option<char> {
self.pos = self.next_pos;
if let Some(c) = c {
if c == '\n' {
self.next_pos.line += 1;
self.next_pos.col = 0;
} else {
self.next_pos.col += 1;
}
}
c
}
pub fn peek(&mut self) -> Option<char> {
self.iter.peek().copied()
}
pub fn next_if(&mut self, f: impl FnOnce(&char) -> bool) -> Option<char> {
let next = self.iter.next_if(f);
self.advance(next)
}
pub fn pos(&self) -> CharPos {
self.pos
}
pub fn until(&mut self, until: char) -> Option<String> {
let mut str = String::new();
let mut next = self.next()?;
while next != until {
str.push(next);
next = self.next()?;
}
Some(str)
}
}
impl<'a> From<&'a str> for CharIter<'a> {
fn from(value: &'a str) -> Self {
Self::new(value)
}
}
+24
View File
@@ -0,0 +1,24 @@
use super::*;
#[derive(Debug, Clone, PartialEq)]
pub enum Keyword {
Let,
Fn,
}
impl Keyword {
pub fn parse(ident: &str) -> Option<Self> {
Some(match ident {
"let" => Self::Let,
"fn" => Self::Fn,
_ => return None,
})
}
}
impl From<Keyword> for Token {
fn from(value: Keyword) -> Self {
Token::Keyword(value)
}
}
+4
View File
@@ -0,0 +1,4 @@
#[derive(Debug, Clone, PartialEq)]
pub enum Literal {
String(String),
}
+134
View File
@@ -0,0 +1,134 @@
use super::io::*;
mod chr;
mod kw;
mod lit;
mod symbol;
pub use chr::*;
pub use kw::*;
pub use lit::*;
pub use symbol::*;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Lit(Literal),
Keyword(Keyword),
Ident(String),
Symbol(Symbol),
}
#[derive(Clone)]
pub struct TokenCursor<'a> {
iter: CharIter<'a>,
prev_sym: Option<Symbol>,
next: Option<(Token, CharSpan)>,
next_start: CharPos,
prev_end: CharPos,
}
impl<'a> TokenCursor<'a> {
pub fn new(iter: impl Into<CharIter<'a>>) -> Self {
let mut s = Self {
next: None,
prev_sym: None,
iter: iter.into(),
next_start: CharPos::default(),
prev_end: CharPos::default(),
};
s.next();
s
}
pub fn next(&mut self) -> Option<Token> {
self.next_span().map(|n| n.0)
}
pub fn next_span(&mut self) -> Option<(Token, CharSpan)> {
self.prev_end = self.iter.pos();
self.prev_sym = self.next.as_ref().and_then(|n| match n.0 {
Token::Symbol(s) => Some(s),
_ => None,
});
while self.iter.next_if(|c| c.is_whitespace()).is_some() {}
self.next_start = self.iter.pos();
std::mem::replace(&mut self.next, Self::get_next(&mut self.iter))
}
fn get_next(iter: &mut CharIter) -> Option<(Token, CharSpan)> {
while iter.next_if(|c| c.is_whitespace()).is_some() {}
if let Some(c) = iter.next() {
let start = iter.pos();
let val = Self::get_next_inner(iter, c);
let span = start.to(iter.pos());
val.map(|v| (v, span))
} else {
None
}
}
fn get_next_inner(iter: &mut CharIter, c: char) -> Option<Token> {
if c == '"' {
return iter.until('"').map(|s| Token::Lit(Literal::String(s)));
}
if let Some(sym) = Symbol::parse(c, iter) {
return Some(Token::Symbol(sym));
}
let mut ident = c.to_string();
while let Some(c) = iter.next_if(|c| !c.is_whitespace() && Symbol::parse_char(*c).is_none())
{
ident.push(c);
}
Some(if let Some(kw) = Keyword::parse(&ident) {
Token::Keyword(kw)
} else {
Token::Ident(ident)
})
}
pub fn peek(&self) -> Option<&Token> {
self.peek_span().map(|v| v.0)
}
pub fn peek_span(&self) -> Option<(&Token, CharSpan)> {
self.next.as_ref().map(|(t, s)| (t, *s))
}
pub fn next_if(&mut self, f: impl FnOnce(&Token) -> bool) -> Option<Token> {
if self.peek().is_some_and(f) {
self.next()
} else {
None
}
}
pub fn next_is(&mut self, token: impl Into<Token>) -> bool {
self.next_is_ref(&token.into())
}
pub fn peek_is(&mut self, token: impl Into<Token>) -> bool {
self.peek().is_some_and(|t| *t == token.into())
}
pub fn next_is_ref(&mut self, token: &Token) -> bool {
self.next_if(|t| t == token).is_some()
}
pub fn next_start(&self) -> CharPos {
self.next_start
}
pub fn prev_end(&self) -> CharPos {
self.prev_end
}
pub fn prev_sym(&self) -> Option<Symbol> {
self.prev_sym
}
}
impl<'a, T: Into<CharIter<'a>>> From<T> for TokenCursor<'a> {
fn from(value: T) -> Self {
Self::new(value.into())
}
}
+83
View File
@@ -0,0 +1,83 @@
use super::*;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Symbol {
// 1 char
OpenParen,
CloseParen,
OpenCurly,
CloseCurly,
Plus,
Minus,
Slash,
Asterisk,
Equal,
Colon,
Semicolon,
SingleQuote,
Comma,
// 2 chars
Arrow,
DoubleArrow,
}
impl Symbol {
pub fn parse(c: char, iter: &mut CharIter) -> Option<Self> {
Self::parse_char(c).map(|s| s.parse_rest(iter))
}
pub fn parse_char(c: char) -> Option<Self> {
Some(match c {
'(' => Symbol::OpenParen,
')' => Symbol::CloseParen,
'{' => Symbol::OpenCurly,
'}' => Symbol::CloseCurly,
'+' => Symbol::Plus,
'-' => Symbol::Minus,
'/' => Symbol::Slash,
'*' => Symbol::Asterisk,
'=' => Symbol::Equal,
':' => Symbol::Colon,
';' => Symbol::Semicolon,
'\'' => Symbol::SingleQuote,
',' => Symbol::Comma,
_ => return None,
})
}
pub fn parse_rest(mut self, iter: &mut CharIter) -> Self {
let Some(next) = iter.peek() else {
return self;
};
match (self, next) {
(Symbol::Minus, '>') => self = Symbol::Arrow,
(Symbol::Equal, '>') => self = Symbol::DoubleArrow,
_ => return self,
}
iter.next();
self
}
pub fn str(&self) -> &'static str {
match self {
Symbol::OpenParen => "(",
Symbol::CloseParen => ")",
Symbol::OpenCurly => "{",
Symbol::CloseCurly => "}",
Symbol::Plus => "+",
Symbol::Minus => "-",
Symbol::Slash => "/",
Symbol::Asterisk => "*",
Symbol::Equal => "=",
Symbol::Colon => ":",
Symbol::Semicolon => ";",
Symbol::SingleQuote => "'",
Symbol::Arrow => "->",
Symbol::DoubleArrow => "=>",
Symbol::Comma => ",",
}
}
}
impl From<Symbol> for Token {
fn from(value: Symbol) -> Self {
Token::Symbol(value)
}
}
+30
View File
@@ -0,0 +1,30 @@
use super::*;
#[derive(Debug)]
pub struct PBlock {
statements: Vec<Node<PStatement>>,
return_last: bool,
}
impl Parsable for PBlock {
type Data = bool;
fn parse(ctx: &mut ParserCtx, curlies: bool) -> ParseResult<Self> {
let end = if curlies {
ctx.expect(Symbol::OpenCurly)?;
Some(Symbol::CloseCurly)
} else {
None
};
let res = ctx.parse_list(
end,
SepCheck::new(Symbol::Semicolon)
.dup(true)
.skip_if(|ctx, _| ctx.prev_sym().is_some_and(|s| s == Symbol::CloseCurly)),
)?;
ParseResult::Ok(Self {
statements: res.nodes,
return_last: res.last_sep,
})
}
}
+52
View File
@@ -0,0 +1,52 @@
use super::*;
#[derive(Debug)]
pub enum PExpr {
Ident(String),
Lit(Literal),
Block(Node<PBlock>),
Group(BNode<PExpr>),
Unit,
}
impl Parsable for PExpr {
fn parse(ctx: &mut ParserCtx, _: ()) -> ParseResult<Self> {
Self::parse_unit(ctx)
}
}
impl PExpr {
fn parse_unit(ctx: &mut ParserCtx) -> ParseResult<Self> {
ParseResult::Ok(match ctx.expect_peek()? {
Token::Lit(lit) => {
let res = PExpr::Lit(lit.clone());
ctx.next();
res
}
Token::Ident(ident) => {
let res = PExpr::Ident(ident.to_string());
ctx.next();
res
}
Token::Symbol(symbol) => match symbol {
Symbol::OpenParen => {
ctx.expect_next()?;
if ctx.next_is(Symbol::CloseParen) {
PExpr::Unit
} else {
let inner = ctx.parse();
let Some(inner) = inner else {
ctx.seek(Symbol::CloseParen);
return ParseResult::SubErr;
};
ctx.expect(Symbol::CloseParen)?;
PExpr::Group(inner.bx())
}
}
Symbol::OpenCurly => PExpr::Block(ctx.parse_with(true)?),
_ => return ctx.unexpected("expression").res(),
},
_ => return ctx.unexpected("expression").res(),
})
}
}
+25
View File
@@ -0,0 +1,25 @@
use super::*;
#[derive(Debug)]
pub struct PFunc {
name: Node<PIdent>,
args: Vec<Node<PVarDef>>,
body: Node<PExpr>,
}
impl Parsable for PFunc {
type Data = ();
fn parse(ctx: &mut ParserCtx, _: Self::Data) -> ParseResult<Self> {
ctx.expect(Keyword::Fn)?;
let name = ctx.parse()?;
ctx.expect(Symbol::OpenParen)?;
let args = ctx.parse_list(Some(Symbol::CloseParen), SepCheck::new(Symbol::Comma))?;
let body = ctx.parse()?;
ParseResult::Ok(Self {
name,
args: args.nodes,
body,
})
}
}
+25
View File
@@ -0,0 +1,25 @@
use super::*;
use std::ops::Deref;
#[derive(Debug)]
pub struct PIdent(String);
impl Deref for PIdent {
type Target = String;
fn deref(&self) -> &String {
&self.0
}
}
impl Parsable for PIdent {
fn parse(ctx: &mut ParserCtx, _: ()) -> ParseResult<Self> {
if let Token::Ident(ident) = ctx.expect_peek()? {
let ident = ident.clone();
ctx.next();
ParseResult::Ok(Self(ident))
} else {
ctx.unexpected("identifier").res()
}
}
}
+128
View File
@@ -0,0 +1,128 @@
use super::*;
pub struct ListRes<T> {
pub nodes: Vec<Node<T>>,
pub last_sep: bool,
}
pub trait BetweenFn<T> {
fn run(&mut self, ctx: &mut ParserCtx, prev: &Node<T>) -> bool;
}
impl ParserCtx<'_> {
pub fn parse_list<T: Parsable<Data = ()>>(
&mut self,
end: Option<impl Into<Token>>,
mut between: impl BetweenFn<T>,
) -> Option<ListRes<T>> {
let end = end.map(|t| t.into());
let mut nodes = Vec::new();
let mut last_sep = false;
macro_rules! abort {
() => {
if end.is_some_and(|t| self.seek(t)) {
break;
} else {
return None;
}
};
}
macro_rules! check_end {
() => {
if end.as_ref().is_some_and(|t| self.next_is_ref(t))
|| (end.is_none() && self.peek().is_none())
{
break;
}
};
}
loop {
check_end!();
last_sep = false;
nodes.push(match self.parse() {
Some(node) => node,
None => abort!(),
});
check_end!();
if between.run(self, nodes.last().unwrap()) {
abort!();
}
last_sep = true;
}
Some(ListRes { nodes, last_sep })
}
}
pub struct SepCheck {
pub sep: Token,
pub dup: bool,
}
impl<T> BetweenFn<T> for SepCheck {
fn run(&mut self, ctx: &mut ParserCtx, prev: &Node<T>) -> bool {
let Some(next) = ctx.expect_next() else {
return true;
};
if next != self.sep {
ctx.msgs
.push(ctx.unexpected(format!("Expected {:?}", self.sep)));
return true;
}
if self.dup {
while ctx.next_is_ref(&self.sep) {}
}
false
}
}
impl SepCheck {
pub fn new(sep: impl Into<Token>) -> Self {
Self {
sep: sep.into(),
dup: false,
}
}
pub fn dup(mut self, dup: bool) -> Self {
self.dup = dup;
self
}
}
pub trait SkipFn<T> = Fn(&mut ParserCtx, &Node<T>) -> bool;
// I hate everything. sepcheck is fine, this is not
pub struct SkipIf<T, F: SkipFn<T>, B: BetweenFn<T>> {
f: F,
inner: B,
_pd: std::marker::PhantomData<T>,
}
impl<T, F: SkipFn<T>, B: BetweenFn<T>> SkipIf<T, F, B> {
pub fn new(f: F, run: B) -> Self {
Self {
f,
inner: run,
_pd: std::marker::PhantomData,
}
}
}
impl<T, F: SkipFn<T>, B: BetweenFn<T>> BetweenFn<T> for SkipIf<T, F, B> {
fn run(&mut self, ctx: &mut ParserCtx, prev: &Node<T>) -> bool {
if (self.f)(ctx, prev) {
false
} else {
self.inner.run(ctx, prev)
}
}
}
pub trait BetweenFnUtil<T>: BetweenFn<T> + Sized {
fn skip_if<F: SkipFn<T>>(self, f: F) -> SkipIf<T, F, Self>;
}
impl<B: BetweenFn<T>, T> BetweenFnUtil<T> for B {
fn skip_if<F: SkipFn<T>>(self, f: F) -> SkipIf<T, F, Self> {
SkipIf::new(f, self)
}
}
+21
View File
@@ -0,0 +1,21 @@
mod block;
mod expr;
mod func;
mod ident;
mod list;
mod node;
mod statement;
mod ty;
mod vardef;
pub use block::*;
pub use expr::*;
pub use func::*;
pub use ident::*;
pub use list::*;
pub use node::*;
pub use statement::*;
pub use ty::*;
pub use vardef::*;
use super::*;
+63
View File
@@ -0,0 +1,63 @@
use super::*;
pub struct Node<T> {
pub data: Option<T>,
}
pub type BNode<T> = Box<Node<T>>;
pub enum ParseResult<T> {
Ok(T),
Node(Node<T>),
Continue(CompilerMsg),
Break(CompilerMsg),
SubErr,
}
pub trait Parsable: Sized {
type Data = ();
fn parse(ctx: &mut ParserCtx, data: Self::Data) -> ParseResult<Self>;
}
impl<T> Node<T> {
pub fn bx(self) -> Box<Self> {
Box::new(self)
}
pub fn map<U>(self, f: impl FnOnce(T) -> U) -> Node<U> {
Node {
data: self.data.map(f),
}
}
}
use std::convert::Infallible;
impl<T> std::ops::FromResidual<Option<Infallible>> for ParseResult<T> {
fn from_residual(residual: Option<Infallible>) -> Self {
match residual {
None => ParseResult::SubErr,
}
}
}
impl<T> std::ops::FromResidual<Result<Infallible, CompilerMsg>> for ParseResult<T> {
fn from_residual(residual: Result<Infallible, CompilerMsg>) -> Self {
match residual {
Err(msg) => ParseResult::Break(msg),
}
}
}
impl CompilerMsg {
pub fn res<T>(self) -> ParseResult<T> {
ParseResult::Break(self)
}
}
impl<T: std::fmt::Debug> std::fmt::Debug for Node<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(d) = &self.data {
d.fmt(f)
} else {
f.write_str("{error}")
}
}
}
+27
View File
@@ -0,0 +1,27 @@
use super::*;
#[derive(Debug)]
pub enum PStatement {
Expr(PExpr),
Let(Node<PVarDef>, Node<PExpr>),
Fn(PFunc),
}
impl Parsable for PStatement {
fn parse(ctx: &mut ParserCtx, _: ()) -> ParseResult<Self> {
let res = match ctx.expect_peek()? {
Token::Keyword(kw) => match kw {
Keyword::Let => {
ctx.next();
let name = ctx.parse()?;
ctx.expect(Symbol::Equal)?;
let body = ctx.parse()?;
Self::Let(name, body)
}
Keyword::Fn => return ParseResult::Node(ctx.parse()?.map(PStatement::Fn)),
},
_ => return ParseResult::Node(ctx.parse()?.map(PStatement::Expr)),
};
ParseResult::Ok(res)
}
}
+12
View File
@@ -0,0 +1,12 @@
use super::*;
#[derive(Debug)]
pub struct PType {
name: Node<PIdent>,
}
impl Parsable for PType {
fn parse(ctx: &mut ParserCtx, _: Self::Data) -> ParseResult<Self> {
ParseResult::Ok(Self { name: ctx.parse()? })
}
}
+18
View File
@@ -0,0 +1,18 @@
use super::*;
#[derive(Debug)]
pub struct PVarDef {
name: Node<PIdent>,
ty: Option<Node<PType>>,
}
impl Parsable for PVarDef {
fn parse(ctx: &mut ParserCtx, _: Self::Data) -> ParseResult<Self> {
let name = ctx.parse()?;
let mut ty = None;
if ctx.next_is(Symbol::Colon) {
ty = Some(ctx.parse()?);
}
ParseResult::Ok(Self { name, ty })
}
}
-97
View File
@@ -1,97 +0,0 @@
use std::{
collections::{HashMap, HashSet},
path::Path,
};
use crate::{
io::{CompilerMsg, CompilerOutput, Span},
ir::Ir,
parser::{self, ExprTy, Ident, Node, parse_file},
};
const EXTENSION: &str = ".lang";
pub fn parse_program(path: impl AsRef<Path>, output: &mut CompilerOutput) -> Option<Ir> {
let path = path.as_ref();
let mut imports = Imports::default();
let dir = path.parent().unwrap();
imports.add(path.file_stem().unwrap().to_str().unwrap());
while let Some(next) = imports.new.pop() {
imports.done.insert(next.clone());
let path = dir.join(next + EXTENSION);
println!("=== {path:?}");
let body = parse_file(path, output)?;
print!("{}", body.new_dsp());
let defs = scan(&mut imports, &body, output);
for (name, spans) in &defs.duplicates {
output.error(CompilerMsg {
msg: format!("Multiple definitions found for {name}"),
spans: spans.clone(),
});
}
}
if !output.errors.is_empty() {
return None;
}
let ir = Ir::default();
Some(ir)
}
pub fn scan(imports: &mut Imports, body: &parser::Body, output: &mut CompilerOutput) -> Defs {
let mut defs = Defs::default();
for item in &body.items {
match &item.ty {
ExprTy::Define { target, const_, .. } if *const_ => match &target.ty {
ExprTy::Ident(name) => defs.add(name),
_ => output.error(("Invalid left hand side of definition", target.span)),
},
ExprTy::Import(import) => {
defs.add(import);
imports.add(&import.name);
}
_ => (),
}
}
defs
}
#[derive(Default)]
pub struct Defs {
map: HashMap<String, (usize, Span)>,
duplicates: HashMap<String, Vec<Span>>,
next_id: usize,
}
impl Defs {
pub fn add(&mut self, ident: &Ident) {
if let Some(def) = self.map.get(&ident.name) {
if let Some(spans) = self.duplicates.get_mut(&ident.name) {
spans.push(ident.span);
} else {
self.duplicates
.insert(ident.name.clone(), vec![def.1, ident.span]);
}
return;
}
self.map
.insert(ident.name.clone(), (self.next_id, ident.span));
self.next_id += 1;
}
}
#[derive(Default)]
pub struct Imports {
done: HashSet<String>,
new: Vec<String>,
}
impl Imports {
pub fn add(&mut self, name: &str) {
if self.done.contains(name) || self.new.iter().any(|v| v == name) {
return;
}
self.new.push(name.to_string());
}
}
+8
View File
@@ -0,0 +1,8 @@
let x = "test";
let y = "test";
fn test(x: u32) "hello";
fn test3() {
arst
}
fn test2() "hello";
Binary file not shown.
Binary file not shown.
-5
View File
@@ -1,5 +0,0 @@
asm {
mov eax, 1
mov ebx, 39
int 0x80
}
-17
View File
@@ -1,17 +0,0 @@
x : u32 = 3;
while true {
print("hello");
print(x);
other.thing();
thing();
break;
}
y :: true;
if y => print("hello");
thing :: fn() {
}
import other;
Binary file not shown.
-5
View File
@@ -1,5 +0,0 @@
thing :: fn() {
print("hello from other");
}
import main;