Compare commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6cc81d7a5c | ||
|
|
85eacd783d | ||
|
|
4fe4b50c8b | ||
|
|
026aec8565 | ||
|
|
113f3d4d9c | ||
|
|
d66f8f02b7 | ||
|
|
4e06e474ea | ||
|
|
84e184518f | ||
|
|
1e39675c29 | ||
|
|
bdeb0d821c | ||
|
|
550d58d6f4 | ||
|
|
fa2a6db2e2 | ||
|
|
571ff70fa1 | ||
|
|
715a50b1fa | ||
|
|
397176759d | ||
|
|
51bdc5c684 | ||
|
|
e2ebf5c681 | ||
|
|
7280f7b071 | ||
|
|
e199620856 | ||
|
|
663e6648ca | ||
|
|
ceebcdc0e3 | ||
|
|
7004cdbfe2 | ||
|
|
433c3114d5 | ||
|
|
b03f755252 | ||
|
|
91f5db6950 | ||
|
|
ddf63ad817 | ||
|
|
bc922a6086 | ||
|
|
ea305909a0 | ||
|
|
e4acaf40aa | ||
|
|
6bc502d284 | ||
|
|
c17122679e | ||
|
|
c9add923be | ||
|
|
a086fa6590 | ||
|
|
66710370bf | ||
|
|
69cd249671 | ||
|
|
ba706ebb73 | ||
|
|
a3f934be21 | ||
|
|
ef35509c98 | ||
|
|
4587f687b9 | ||
|
|
0ac7c5cc02 | ||
|
|
978bac88ed | ||
|
|
380a0f977a | ||
|
|
473ddab0d4 | ||
|
|
c2a8c50a6d | ||
|
|
1d568f8ce3 | ||
|
|
d864adfd05 | ||
|
|
b3f77076d4 | ||
|
|
2f91e454dd | ||
|
|
e5ae506a84 | ||
|
|
83edad0cd8 | ||
|
|
f702f47714 | ||
|
|
2582e8c87e | ||
|
|
229b026573 | ||
|
|
29316e6353 | ||
|
|
bdf08ce52c | ||
|
|
edabc22431 |
@@ -0,0 +1,68 @@
|
||||
mod setup;
|
||||
use setup::*;
|
||||
|
||||
#[test]
|
||||
fn mov() {
|
||||
let c = &mut TestCtx::new("mov");
|
||||
|
||||
for dst in regs() {
|
||||
for src in regs() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
|
||||
for dst in regs() {
|
||||
for src in mems() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
|
||||
for dst in regs() {
|
||||
for src in imms() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
|
||||
for dst in mems() {
|
||||
for src in regs() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
|
||||
for dst in mems() {
|
||||
for src in imms() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_sub() {
|
||||
let c = &mut TestCtx::new("add_sub");
|
||||
|
||||
// add
|
||||
for dst in regs() {
|
||||
for src in imms() {
|
||||
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
|
||||
}
|
||||
}
|
||||
|
||||
for dst in regs() {
|
||||
for src in regs() {
|
||||
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
|
||||
}
|
||||
}
|
||||
|
||||
for dst in mems() {
|
||||
for src in imms() {
|
||||
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
|
||||
}
|
||||
}
|
||||
|
||||
// sub
|
||||
for dst in regs() {
|
||||
for src in imms() {
|
||||
eq(c, format!("sub {dst}, {src}"), |c| c.sub(dst, src))
|
||||
}
|
||||
}
|
||||
}
|
||||
Generated
+110
-1
@@ -3,5 +3,114 @@
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "v2"
|
||||
name = "arrayvec"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
|
||||
|
||||
[[package]]
|
||||
name = "bitcode"
|
||||
version = "0.6.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a6ed1b54d8dc333e7be604d00fa9262f4635485ffea923647b6521a5fff045d"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"bitcode_derive",
|
||||
"bytemuck",
|
||||
"glam",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitcode_derive"
|
||||
version = "0.6.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "238b90427dfad9da4a9abd60f3ec1cdee6b80454bde49ed37f1781dd8e9dc7f9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
|
||||
|
||||
[[package]]
|
||||
name = "glam"
|
||||
version = "0.33.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "898f5a568a84989b6c0f8caa50a93074b97dbdc58fc6d9543157bb4562758933"
|
||||
|
||||
[[package]]
|
||||
name = "lang"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bitcode",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_core"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||
|
||||
+4
-1
@@ -1,6 +1,9 @@
|
||||
[package]
|
||||
name = "v2"
|
||||
name = "lang"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
|
||||
[dev-dependencies]
|
||||
bitcode = "0.6.9"
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
use crate::{
|
||||
backend::{Addr, LinkedProgram, Program},
|
||||
io::CompilerMsg,
|
||||
};
|
||||
|
||||
pub mod x86_64;
|
||||
|
||||
pub trait Arch: Sized {
|
||||
const NAME: &str;
|
||||
type Asm;
|
||||
type Addr: Addr;
|
||||
fn compile(p: &Program<Self>) -> Result<LinkedProgram<Self::Addr>, CompilerMsg>;
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::*;
|
||||
use crate::backend::{LibImport, LinkedProgram, SymImport, SymTable, Symbol};
|
||||
use util::*;
|
||||
|
||||
pub struct Encoder<'a> {
|
||||
pub code: Code,
|
||||
pub sym_tab: SymTable<u64>,
|
||||
pub sym_refs: HashMap<Symbol, Vec<usize>>,
|
||||
pub program: &'a Program<X86_64>,
|
||||
}
|
||||
|
||||
pub fn compile(p: &Program<X86_64>) -> Result<LinkedProgram<u64>, CompilerMsg> {
|
||||
let mut encoder = Encoder::new(p);
|
||||
|
||||
p.encode_data(&mut encoder.code.bytes, &mut encoder.sym_tab);
|
||||
|
||||
for f in &p.funcs {
|
||||
let addr = encoder.code.bytes.len();
|
||||
encoder.sym_tab.insert(f.sym, addr as u64);
|
||||
for instr in &f.instrs {
|
||||
encoder.compile_instr(instr)?;
|
||||
}
|
||||
}
|
||||
|
||||
for (pos, sym) in encoder.code.missing.drain(..) {
|
||||
let info = encoder.program.sym_info(sym);
|
||||
if info.external {
|
||||
encoder.sym_refs.entry(sym).or_default().push(pos);
|
||||
} else {
|
||||
let addr = encoder
|
||||
.sym_tab
|
||||
.get(sym)
|
||||
.ok_or(CompilerMsg::from(format!("missing symbol {}", info.name)))?;
|
||||
encoder.code.bytes[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr))
|
||||
}
|
||||
}
|
||||
|
||||
let imports = p
|
||||
.external
|
||||
.iter()
|
||||
.map(|e| LibImport {
|
||||
name: e.file.clone(),
|
||||
syms: e
|
||||
.syms
|
||||
.iter()
|
||||
.map(|&s| SymImport {
|
||||
name: p.sym_info(s).name.clone(),
|
||||
usages: encoder.sym_refs.entry(s).or_default().clone(),
|
||||
})
|
||||
.collect(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(LinkedProgram {
|
||||
code: encoder.code.bytes,
|
||||
entry: p.entry.and_then(|e| encoder.sym_tab.get(e)),
|
||||
imports,
|
||||
})
|
||||
}
|
||||
|
||||
type BInstr = crate::backend::Instr<X86_64>;
|
||||
impl<'a> Encoder<'a> {
|
||||
fn compile_instr(&mut self, instr: &BInstr) -> Result<(), CompilerMsg> {
|
||||
match instr {
|
||||
BInstr::Asm(asm) => {
|
||||
self.code.extend(asm);
|
||||
}
|
||||
_ => todo!(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new(program: &'a Program<X86_64>) -> Self {
|
||||
Self {
|
||||
code: Code::default(),
|
||||
sym_tab: SymTable::new(program.sym_count()),
|
||||
sym_refs: Default::default(),
|
||||
program,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,303 @@
|
||||
use super::*;
|
||||
use crate::backend::Symbol;
|
||||
|
||||
type ERes = Result<(), CompilerMsg>;
|
||||
|
||||
/// machine code
|
||||
#[derive(Default)]
|
||||
pub struct Code {
|
||||
pub(super) bytes: Vec<u8>,
|
||||
pub(super) missing: Vec<(usize, Symbol)>,
|
||||
}
|
||||
|
||||
impl Code {
|
||||
pub fn mov(&mut self, dst: impl RegMem, src: impl Into<RegMemImm>) -> ERes {
|
||||
let src = src.into();
|
||||
match dst.kind() {
|
||||
RegMemKind::Reg(mut dst) => match src {
|
||||
RegMemImm::Reg(src) => {
|
||||
if dst.width() != src.width() {
|
||||
return Err("src and dst are not same width".into());
|
||||
}
|
||||
self.prefix16(dst);
|
||||
self.rex(dst, src, 0, dst)?;
|
||||
self.bytes.push(0x88 | dst.not8());
|
||||
self.modrm(src, dst);
|
||||
}
|
||||
RegMemImm::Imm(src) => {
|
||||
let src_width = src.width_unsigned()?;
|
||||
if src_width > dst.width() {
|
||||
return Err("immediate cannot fit in register".into());
|
||||
}
|
||||
self.prefix16(dst);
|
||||
if dst.width() == Width::B64 && src_width <= Width::B32 && src.0 < 0 {
|
||||
// use different op that sign extends for less bytes
|
||||
self.bytes
|
||||
.extend([rex(dst, 0, 0, dst), 0xc7, 0xc0 | dst.base()]);
|
||||
self.imm(src, Width::B32);
|
||||
} else {
|
||||
if src_width <= Width::B32 {
|
||||
dst = dst.lower64();
|
||||
}
|
||||
self.rex(dst, 0, 0, dst)?;
|
||||
self.bytes.push(0xb0 | (dst.not8() << 3) | dst.base());
|
||||
self.imm(src, dst.width());
|
||||
}
|
||||
}
|
||||
RegMemImm::Mem(src) => {
|
||||
if src.width != dst.width() {
|
||||
return Err("register & memory sizes don't match".into());
|
||||
}
|
||||
self.prefix32(src)?;
|
||||
self.prefix16(dst);
|
||||
self.rex(dst, dst, 0, src)?;
|
||||
self.bytes.push(0x8a | dst.not8());
|
||||
self.modrm(dst, src);
|
||||
}
|
||||
},
|
||||
RegMemKind::Mem(dst) => match src {
|
||||
RegMemImm::Reg(src) => {
|
||||
if src.width() != dst.width {
|
||||
return Err("register & memory sizes don't match".into());
|
||||
}
|
||||
self.prefix32(dst)?;
|
||||
self.prefix16(src);
|
||||
self.rex(dst, src, 0, dst)?;
|
||||
self.bytes.push(0x88 | src.not8());
|
||||
self.modrm(src, dst);
|
||||
}
|
||||
RegMemImm::Imm(src) => {
|
||||
let encode_width = dst.width.min(Width::B32);
|
||||
let src_width = if dst.width == Width::B64 {
|
||||
src.width_signed()
|
||||
} else {
|
||||
src.width_unsigned()
|
||||
}?;
|
||||
if src_width == Width::B64 {
|
||||
return Err("cannot move 64 bit immediate into memory".into());
|
||||
}
|
||||
if src_width > dst.width {
|
||||
return Err("source cannot fit in destination".into());
|
||||
}
|
||||
self.prefix32(dst)?;
|
||||
self.prefix16(encode_width);
|
||||
self.rex(dst, 0, 0, dst)?;
|
||||
self.bytes.push(0xc6 | encode_width.not8());
|
||||
self.modrm(0, dst);
|
||||
self.imm(src, encode_width);
|
||||
}
|
||||
RegMemImm::Mem(_) => return Err("cannot move memory to memory".into()),
|
||||
},
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn push(&mut self, reg: impl Into<RegMemImm>) -> ERes {
|
||||
match reg.into() {
|
||||
RegMemImm::Reg(reg) => match reg.width() {
|
||||
Width::B64 => {
|
||||
if reg.gt8() {
|
||||
self.bytes.push(0x41);
|
||||
}
|
||||
self.bytes.push(0x50 | reg.base());
|
||||
}
|
||||
Width::B16 => todo!(),
|
||||
_ => return Err("register must be 64 or 16 bit".into()),
|
||||
},
|
||||
RegMemImm::Imm(imm) => match imm.width_unsigned()? {
|
||||
Width::B8 => {
|
||||
self.bytes.push(0x6a);
|
||||
self.bytes.push(imm.0 as u8);
|
||||
}
|
||||
Width::B16 | Width::B32 => {
|
||||
self.bytes.push(0x68);
|
||||
self.bytes.extend((imm.0 as u32).to_le_bytes());
|
||||
}
|
||||
Width::B64 => return Err("immediate must be 32 bit or less".into()),
|
||||
},
|
||||
RegMemImm::Mem(mem) => todo!(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn pop(&mut self, reg: Reg) -> ERes {
|
||||
match reg.width() {
|
||||
Width::B64 | Width::B16 => (),
|
||||
_ => return Err("register must be 64 or 16 bit".into()),
|
||||
}
|
||||
self.prefix16(reg);
|
||||
if reg.gt8() {
|
||||
self.bytes.push(0x41);
|
||||
}
|
||||
self.bytes.push(0x58 | reg.base());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn lea(&mut self, dst: Reg, sym: Symbol) -> ERes {
|
||||
self.rex(1, dst, 0, 0)?;
|
||||
self.bytes.push(0x8d);
|
||||
self.modrm(dst, sym);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn int(&mut self, code: u8) {
|
||||
self.bytes.extend([0xcd, code])
|
||||
}
|
||||
|
||||
pub fn syscall(&mut self) {
|
||||
self.bytes.extend([0x0f, 0x05])
|
||||
}
|
||||
|
||||
pub fn call(&mut self, sym: Symbol) {
|
||||
self.bytes.push(0xe8);
|
||||
self.sym_offset4(sym);
|
||||
}
|
||||
|
||||
pub fn call_mem(&mut self, sym: Symbol) {
|
||||
self.bytes.extend([0xff, 0x15]);
|
||||
self.sym_offset4(sym);
|
||||
}
|
||||
|
||||
pub fn ret(&mut self) {
|
||||
self.bytes.push(0xc3);
|
||||
}
|
||||
|
||||
fn add_sub(&mut self, dst: impl RegMem, src: impl Into<RegMemImm>, ext: u8) -> ERes {
|
||||
match src.into() {
|
||||
RegMemImm::Reg(src) => {
|
||||
if src.width() != dst.width() {
|
||||
return Err("incompatible widths".into());
|
||||
}
|
||||
self.prefix32(dst)?;
|
||||
self.prefix16(src);
|
||||
self.rex(dst, src, 0, dst)?;
|
||||
self.bytes.push(src.not8());
|
||||
self.modrm(src, dst);
|
||||
}
|
||||
RegMemImm::Imm(mut src) => {
|
||||
let mut imm_width = src.width_signed()?;
|
||||
let dst_width = dst.width().min(Width::B32);
|
||||
|
||||
if imm_width > dst_width {
|
||||
imm_width = src.width_unsigned()?;
|
||||
if dst.width() == Width::B64 || imm_width > dst_width {
|
||||
return Err("immediate overflow".into());
|
||||
}
|
||||
src = src.reinterpret(dst_width);
|
||||
imm_width = src.width_signed()?;
|
||||
}
|
||||
let code = if dst.width() == Width::B8 {
|
||||
0x80
|
||||
} else if imm_width == Width::B8 {
|
||||
0x83
|
||||
} else {
|
||||
imm_width = dst_width;
|
||||
0x81
|
||||
};
|
||||
|
||||
self.prefix32(dst)?;
|
||||
self.prefix16(dst_width);
|
||||
self.rex(dst, 0, 0, dst)?;
|
||||
self.bytes.push(code);
|
||||
self.modrm(ext, dst);
|
||||
self.imm(src, imm_width);
|
||||
}
|
||||
RegMemImm::Mem(src) => {
|
||||
let RegMemKind::Reg(dst) = dst.kind() else {
|
||||
return Err("cannot add memory to memory".into());
|
||||
};
|
||||
if src.width() != dst.width() {
|
||||
return Err("incompatible widths".into());
|
||||
}
|
||||
self.prefix32(src)?;
|
||||
self.prefix16(dst);
|
||||
self.rex(dst, dst, 0, src)?;
|
||||
self.bytes.push(0x2 | dst.not8());
|
||||
self.modrm(dst, src);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add(&mut self, dst: impl RegMem, src: impl Into<RegMemImm>) -> ERes {
|
||||
self.add_sub(dst, src, 0)
|
||||
}
|
||||
|
||||
pub fn sub(&mut self, dst: impl RegMem, src: impl Into<RegMemImm>) -> ERes {
|
||||
self.add_sub(dst, src, 5)
|
||||
}
|
||||
|
||||
fn prefix16(&mut self, width: impl Into<Width>) {
|
||||
if width.into() == Width::B16 {
|
||||
self.bytes.push(0x66);
|
||||
}
|
||||
}
|
||||
|
||||
fn prefix32(&mut self, mem: impl MaybeMem) -> Result<(), CompilerMsg> {
|
||||
let Some(mem) = mem.mem() else {
|
||||
return Ok(());
|
||||
};
|
||||
match mem.reg.width() {
|
||||
Width::B8 | Width::B16 => return Err("invalid register width".into()),
|
||||
Width::B32 => self.bytes.push(0x67),
|
||||
Width::B64 => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn rex(&mut self, w: impl RexW, r: impl RexBit, x: u8, b: impl RexBit) -> ERes {
|
||||
if r.req() && b.req_no() || r.req_no() && b.req() {
|
||||
return Err("registers incompatible (REX)".into());
|
||||
}
|
||||
if w.rexw() || r.rex() || x.rex() || b.rex() || r.req() || b.req() {
|
||||
self.bytes.push(rex(w, r, x, b));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn modrm(&mut self, reg: impl ModRMReg, rm: impl ModRMRM) {
|
||||
let addr = rm.addr();
|
||||
let mod_ = match addr {
|
||||
EffAddr::Mem0 | EffAddr::Sym(_) => 0b00,
|
||||
EffAddr::Mem8(_) => 0b01,
|
||||
EffAddr::Mem32(_) => 0b10,
|
||||
EffAddr::None => 0b11,
|
||||
};
|
||||
self.bytes
|
||||
.push(((mod_ as u8) << 6) | (reg.val() << 3) | rm.rm());
|
||||
if !matches!(addr, EffAddr::None) && rm.rm() == 0b100 {
|
||||
// SIB
|
||||
self.bytes.push(0x24);
|
||||
}
|
||||
match addr {
|
||||
EffAddr::Mem8(disp) => self.bytes.push(disp as u8),
|
||||
EffAddr::Mem32(disp) => self.bytes.extend(disp.to_le_bytes()),
|
||||
EffAddr::Sym(sym) => self.sym_offset4(sym),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
/// inserts a 32 bit offset from a symbol
|
||||
fn sym_offset4(&mut self, sym: Symbol) {
|
||||
let pos = self.bytes.len();
|
||||
self.bytes.extend([0; 4]);
|
||||
self.missing.push((pos, sym));
|
||||
}
|
||||
|
||||
pub fn extend(&mut self, other: &Code) {
|
||||
let pos = self.bytes.len();
|
||||
self.bytes.extend(&other.bytes);
|
||||
self.missing
|
||||
.extend(other.missing.iter().map(|&(p, s)| (pos + p, s)));
|
||||
}
|
||||
|
||||
fn imm(&mut self, imm: Imm, width: Width) {
|
||||
self.bytes.extend(&imm.0.to_le_bytes()[..width.bytes()]);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result<Code, CompilerMsg> {
|
||||
let mut code = Code::default();
|
||||
f(&mut code)?;
|
||||
Ok(code)
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
mod compile;
|
||||
mod encode;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
mod types;
|
||||
mod util;
|
||||
|
||||
use crate::{
|
||||
arch::Arch,
|
||||
backend::{LinkedProgram, Program},
|
||||
io::CompilerMsg,
|
||||
};
|
||||
|
||||
pub use compile::*;
|
||||
pub use encode::*;
|
||||
pub use types::*;
|
||||
use util::*;
|
||||
|
||||
pub struct X86_64;
|
||||
|
||||
impl Arch for X86_64 {
|
||||
const NAME: &str = "x86_64";
|
||||
type Asm = Code;
|
||||
type Addr = u64;
|
||||
fn compile(p: &Program<Self>) -> Result<LinkedProgram<Self::Addr>, CompilerMsg> {
|
||||
compile(p)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
mod setup;
|
||||
use setup::*;
|
||||
|
||||
#[test]
|
||||
fn mov() {
|
||||
let c = &mut TestCtx::new("mov");
|
||||
|
||||
for dst in regs() {
|
||||
for src in regs() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
|
||||
for dst in regs() {
|
||||
for src in mems() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
|
||||
for dst in regs() {
|
||||
for src in imms() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
|
||||
for dst in mems() {
|
||||
for src in regs() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
|
||||
for dst in mems() {
|
||||
for src in imms() {
|
||||
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_sub() {
|
||||
let c = &mut TestCtx::new("add_sub");
|
||||
|
||||
// add
|
||||
for dst in regs() {
|
||||
for src in imms() {
|
||||
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
|
||||
}
|
||||
}
|
||||
|
||||
for dst in regs() {
|
||||
for src in regs() {
|
||||
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
|
||||
}
|
||||
}
|
||||
|
||||
for dst in regs() {
|
||||
for src in mems() {
|
||||
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
|
||||
}
|
||||
}
|
||||
|
||||
for dst in mems() {
|
||||
for src in imms() {
|
||||
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
|
||||
}
|
||||
}
|
||||
|
||||
for dst in mems() {
|
||||
for src in regs() {
|
||||
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
|
||||
}
|
||||
}
|
||||
|
||||
// sub
|
||||
for dst in regs() {
|
||||
for src in imms() {
|
||||
eq(c, format!("sub {dst}, {src}"), |c| c.sub(dst, src))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,185 @@
|
||||
use crate::arch::x86_64::*;
|
||||
use std::{collections::HashMap, fs::OpenOptions, io::Write, process::Command};
|
||||
|
||||
const DISPS: &[i32] = &[
|
||||
0x0,
|
||||
i8::MIN as i32,
|
||||
i8::MAX as i32,
|
||||
i16::MIN as i32,
|
||||
i16::MAX as i32,
|
||||
i32::MIN,
|
||||
i32::MAX,
|
||||
];
|
||||
|
||||
const IMMS: &[i128] = &[
|
||||
0x0,
|
||||
i8::MIN as i128,
|
||||
i8::MAX as i128,
|
||||
i16::MIN as i128,
|
||||
i16::MAX as i128,
|
||||
i32::MIN as i128,
|
||||
i32::MAX as i128,
|
||||
i64::MIN as i128,
|
||||
i64::MAX as i128,
|
||||
u8::MAX as i128,
|
||||
u8::MAX as i128 + 1,
|
||||
u16::MAX as i128,
|
||||
u16::MAX as i128 + 1,
|
||||
u32::MAX as i128,
|
||||
u32::MAX as i128 + 1,
|
||||
i64::MAX as i128,
|
||||
];
|
||||
|
||||
const WIDTHS: &[Width] = &[Width::B8, Width::B16, Width::B32, Width::B64];
|
||||
|
||||
pub fn imms() -> impl Iterator<Item = i128> {
|
||||
IMMS.iter().cloned()
|
||||
}
|
||||
|
||||
pub fn regs() -> impl Iterator<Item = Reg> {
|
||||
Reg::IMPORTANT.iter().cloned()
|
||||
}
|
||||
|
||||
pub fn mems() -> impl Iterator<Item = Mem> {
|
||||
gen move {
|
||||
for ® in Reg::IMPORTANT {
|
||||
for &disp in DISPS {
|
||||
for &width in WIDTHS {
|
||||
yield mem(reg, disp, width);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TestCtx {
|
||||
path: String,
|
||||
code: Code,
|
||||
cache: HashMap<String, Result<Vec<u8>, String>>,
|
||||
changed: bool,
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
pub fn eq(
|
||||
ctx: &mut TestCtx,
|
||||
asm: impl AsRef<str>,
|
||||
instr: impl Fn(&mut Code) -> Result<(), CompilerMsg>,
|
||||
) {
|
||||
let asm = asm.as_ref();
|
||||
let (mut res, cache) = eq_(ctx, asm, &instr);
|
||||
if res.is_err() && cache {
|
||||
ctx.cache.remove(asm);
|
||||
res = eq_(ctx, asm, &instr).0;
|
||||
}
|
||||
if let Err(err) = res {
|
||||
panic!("{err}");
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
pub fn eq_(
|
||||
ctx: &mut TestCtx,
|
||||
asm: &str,
|
||||
instr: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>,
|
||||
) -> (Result<(), String>, bool) {
|
||||
let (expected, cache) = if let Some(val) = ctx.cache.get(asm) {
|
||||
(val, true)
|
||||
} else {
|
||||
ctx.changed = true;
|
||||
let res = nasm(asm);
|
||||
ctx.cache.insert(asm.to_string(), res);
|
||||
(ctx.cache.get(asm).unwrap(), false)
|
||||
};
|
||||
let code = &mut ctx.code;
|
||||
let res = instr(code);
|
||||
let res = match (expected, res) {
|
||||
(Ok(expected), Err(e)) => Err(format!(
|
||||
"{asm}: failed to compile: {}\nexpected: {expected:x?}",
|
||||
e.msg
|
||||
)),
|
||||
(Err(e), Ok(_)) => {
|
||||
let res = &code.bytes[..];
|
||||
Err(format!(
|
||||
"{asm}: should not have compiled:\n{e}\ngot: {res:x?}"
|
||||
))
|
||||
}
|
||||
(Err(_), Err(_)) => Ok(()),
|
||||
(Ok(expected), Ok(_)) => {
|
||||
let res = &code.bytes[..];
|
||||
if expected != res {
|
||||
Err(format!("{asm}: expected {expected:x?}, got {res:x?}"))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
};
|
||||
ctx.code.bytes.clear();
|
||||
(res, cache)
|
||||
}
|
||||
|
||||
fn nasm(input: &str) -> Result<Vec<u8>, String> {
|
||||
let fin = "/tmp/69420nasm_in.asm";
|
||||
let fout = "/tmp/69420nasm_out.o";
|
||||
let input = "result:".to_string() + input;
|
||||
write(fin, input.as_bytes());
|
||||
run(["nasm", "-w+error", "-felf64", fin, &format!("-o{fout}")])?;
|
||||
let output = run(["objdump", "--no-addresses", "-dw", "-Mintel", fout])?;
|
||||
let mut iter = output.lines().skip_while(|l| !l.contains("result")).skip(1);
|
||||
let res_line = iter.next().unwrap().trim();
|
||||
let end = res_line.find("\t").unwrap();
|
||||
let res_line = &res_line[..end];
|
||||
let bytes = res_line
|
||||
.trim()
|
||||
.split(" ")
|
||||
.map(|s| u8::from_str_radix(s, 16).unwrap())
|
||||
.collect();
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
fn run<const N: usize>(input: [&str; N]) -> Result<String, String> {
|
||||
let path = input[0];
|
||||
let mut cmd = Command::new(path);
|
||||
cmd.args(&input[1..]);
|
||||
let output = cmd.output().expect("failed to run");
|
||||
if output.status.code().unwrap() != 0 {
|
||||
return Err(output.stderr.try_into().unwrap());
|
||||
}
|
||||
Ok(output.stdout.try_into().unwrap())
|
||||
}
|
||||
|
||||
fn write(path: &str, binary: &[u8]) {
|
||||
let mut file = OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open(path)
|
||||
.expect("Failed to create file");
|
||||
file.write_all(binary).expect("Failed to write to file");
|
||||
file.sync_all().expect("Failed to sync file");
|
||||
}
|
||||
|
||||
const CACHE_PATH: &str = "test/nasm_cache";
|
||||
|
||||
impl TestCtx {
|
||||
pub fn new(name: &str) -> Self {
|
||||
let path = CACHE_PATH.to_string() + "/" + name;
|
||||
let cache = match std::fs::read(&path) {
|
||||
Ok(bytes) => bitcode::decode(&bytes).unwrap_or_default(),
|
||||
Err(_) => Default::default(),
|
||||
};
|
||||
Self {
|
||||
path,
|
||||
code: Default::default(),
|
||||
cache,
|
||||
changed: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TestCtx {
|
||||
fn drop(&mut self) {
|
||||
if self.changed {
|
||||
write(&self.path, &bitcode::encode(&self.cache));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn hello() -> Result<(), CompilerMsg> {
|
||||
let mut program = Program::<X86_64>::default();
|
||||
let text = b"Hello world!\n";
|
||||
let text_sym = program.ro_data("hello_en", text);
|
||||
let text2 = "世界、こんにちは!\n";
|
||||
let text_sym2 = program.ro_data("hello_jp", text2);
|
||||
let hello2 = program.func(
|
||||
"hello2",
|
||||
[Instr::Asm(encode(|c| {
|
||||
c.mov(ax, 1)?;
|
||||
c.mov(di, 1)?;
|
||||
c.lea(rsi, text_sym2);
|
||||
c.mov(dx, text2.len() as u64)?;
|
||||
c.syscall();
|
||||
c.ret();
|
||||
Ok(())
|
||||
})?)],
|
||||
);
|
||||
let entry = program.func(
|
||||
"main",
|
||||
[Instr::Asm(encode(|c| {
|
||||
c.mov(rdi, 39)?;
|
||||
c.push(rdi)?;
|
||||
c.mov(ax, 1)?;
|
||||
c.mov(di, 1)?;
|
||||
c.lea(rsi, text_sym);
|
||||
c.mov(dx, text.len() as u64)?;
|
||||
c.syscall();
|
||||
c.call(hello2);
|
||||
c.mov(ax, 0x3c)?;
|
||||
c.pop(rdi)?;
|
||||
c.syscall();
|
||||
Ok(())
|
||||
})?)],
|
||||
);
|
||||
program.entry = Some(entry);
|
||||
let linked = program.compile().expect("failed to compile");
|
||||
let binary = linked.to_elf();
|
||||
let path = "test/bin/x86_64_test";
|
||||
write(path, &binary);
|
||||
println!("running...");
|
||||
let mut cmd = Command::new(path);
|
||||
let output = cmd.output().expect("failed to run");
|
||||
let Some(code) = output.status.code() else {
|
||||
panic!("no exit code");
|
||||
};
|
||||
let result: String = output.stdout.try_into().expect("non ascii output");
|
||||
assert_eq!(result, "Hello world!\n世界、こんにちは!\n");
|
||||
assert_eq!(code, 39);
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
mod linux;
|
||||
mod util;
|
||||
mod windows;
|
||||
|
||||
use crate::{
|
||||
arch::x86_64::*,
|
||||
backend::{Instr, Program},
|
||||
io::CompilerMsg,
|
||||
};
|
||||
use std::process::Command;
|
||||
use util::*;
|
||||
@@ -0,0 +1,13 @@
|
||||
use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt};
|
||||
|
||||
pub fn write(path: &str, binary: &[u8]) {
|
||||
let mut file = OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.mode(0o750)
|
||||
.open(path)
|
||||
.expect("Failed to create file");
|
||||
file.write_all(binary).expect("Failed to write to file");
|
||||
file.sync_all().expect("Failed to sync file");
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn hello() -> Result<(), CompilerMsg> {
|
||||
let mut program = Program::<X86_64>::default();
|
||||
let [get_std_handle, write_file, exit_process] =
|
||||
program.external("KERNEL32.dll", ["GetStdHandle", "WriteFile", "ExitProcess"]);
|
||||
let text = b"Hello world!\n";
|
||||
let text_sym = program.ro_data("hello_en", text);
|
||||
let written = program.ro_data("written", [0; 4]);
|
||||
let entry = program.func(
|
||||
"main",
|
||||
[Instr::Asm(encode(|c| {
|
||||
c.sub(esp, 0x28)?;
|
||||
// stdout
|
||||
c.mov(ecx, -11)?;
|
||||
c.call_mem(get_std_handle);
|
||||
// write
|
||||
c.mov(rcx, rax)?;
|
||||
c.lea(rdx, text_sym);
|
||||
c.mov(r8d, text.len() as u64)?;
|
||||
c.lea(r9, written);
|
||||
c.mov(mem(rsp, 0x20, Width::B32), 0)?;
|
||||
c.call_mem(write_file);
|
||||
// exit
|
||||
c.mov(ecx, 39)?;
|
||||
c.call_mem(exit_process);
|
||||
Ok(())
|
||||
})?)],
|
||||
);
|
||||
program.entry = Some(entry);
|
||||
let linked = program.compile().expect("failed to compile");
|
||||
|
||||
let binary = linked.to_pe();
|
||||
let path = "test/bin/x86_64_test.exe";
|
||||
write(path, &binary);
|
||||
|
||||
let mut cmd = Command::new("wine");
|
||||
cmd.arg(path);
|
||||
let output = cmd.output().expect("failed to run");
|
||||
let Some(code) = output.status.code() else {
|
||||
panic!("no exit code");
|
||||
};
|
||||
let result: String = output.stdout.try_into().expect("non ascii output");
|
||||
assert_eq!(result, "Hello world!\n");
|
||||
assert_eq!(code, 39);
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
mod full;
|
||||
mod asm;
|
||||
@@ -0,0 +1,257 @@
|
||||
use super::*;
|
||||
use crate::backend::Symbol;
|
||||
|
||||
pub trait RegMem: RexBit + RexW + ModRMRM + Copy + MaybeMem {
|
||||
fn width(&self) -> Width;
|
||||
fn kind(self) -> RegMemKind;
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum RegMemKind {
|
||||
Reg(Reg),
|
||||
Mem(Mem),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum RegMemImm {
|
||||
Reg(Reg),
|
||||
Imm(Imm),
|
||||
Mem(Mem),
|
||||
}
|
||||
|
||||
pub trait MaybeMem {
|
||||
fn mem(&self) -> Option<Mem>;
|
||||
}
|
||||
|
||||
impl RegMem for Reg {
|
||||
fn width(&self) -> Width {
|
||||
self.width()
|
||||
}
|
||||
fn kind(self) -> RegMemKind {
|
||||
RegMemKind::Reg(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl MaybeMem for Reg {
|
||||
fn mem(&self) -> Option<Mem> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl RegMem for Mem {
|
||||
fn width(&self) -> Width {
|
||||
self.width
|
||||
}
|
||||
fn kind(self) -> RegMemKind {
|
||||
RegMemKind::Mem(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl MaybeMem for Mem {
|
||||
fn mem(&self) -> Option<Mem> {
|
||||
Some(*self)
|
||||
}
|
||||
}
|
||||
|
||||
// fromrot
|
||||
impl From<Reg> for RegMemImm {
|
||||
fn from(value: Reg) -> Self {
|
||||
Self::Reg(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Reg> for RegMemKind {
|
||||
fn from(value: Reg) -> Self {
|
||||
Self::Reg(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Mem> for RegMemImm {
|
||||
fn from(value: Mem) -> Self {
|
||||
Self::Mem(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Mem> for RegMemKind {
|
||||
fn from(value: Mem) -> Self {
|
||||
Self::Mem(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u64> for RegMemImm {
|
||||
fn from(value: u64) -> Self {
|
||||
Self::Imm(value.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for RegMemImm {
|
||||
fn from(value: i64) -> Self {
|
||||
Self::Imm(value.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i32> for RegMemImm {
|
||||
fn from(value: i32) -> Self {
|
||||
Self::Imm(value.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i128> for RegMemImm {
|
||||
fn from(value: i128) -> Self {
|
||||
Self::Imm(value.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ModRMRM {
|
||||
fn rm(&self) -> u8;
|
||||
fn addr(&self) -> EffAddr;
|
||||
}
|
||||
|
||||
pub enum EffAddr {
|
||||
Mem0,
|
||||
Mem8(i8),
|
||||
Mem32(i32),
|
||||
Sym(Symbol),
|
||||
None,
|
||||
}
|
||||
|
||||
impl ModRMRM for Reg {
|
||||
fn rm(&self) -> u8 {
|
||||
self.base()
|
||||
}
|
||||
fn addr(&self) -> EffAddr {
|
||||
EffAddr::None
|
||||
}
|
||||
}
|
||||
|
||||
impl ModRMRM for Mem {
|
||||
fn rm(&self) -> u8 {
|
||||
self.reg.base()
|
||||
}
|
||||
fn addr(&self) -> EffAddr {
|
||||
const I8_MIN: i32 = i8::MIN as i32;
|
||||
const I8_MAX: i32 = i8::MAX as i32;
|
||||
let disp = self.disp;
|
||||
match disp {
|
||||
0 => {
|
||||
if self.reg.base() == 0b101 {
|
||||
EffAddr::Mem8(0)
|
||||
} else {
|
||||
EffAddr::Mem0
|
||||
}
|
||||
}
|
||||
I8_MIN..=I8_MAX => EffAddr::Mem8(disp as i8),
|
||||
_ => EffAddr::Mem32(disp),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ModRMRM for i32 {
|
||||
fn rm(&self) -> u8 {
|
||||
0b101
|
||||
}
|
||||
fn addr(&self) -> EffAddr {
|
||||
EffAddr::Mem32(*self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ModRMRM for Symbol {
|
||||
fn rm(&self) -> u8 {
|
||||
0b101
|
||||
}
|
||||
|
||||
fn addr(&self) -> EffAddr {
|
||||
EffAddr::Sym(*self)
|
||||
}
|
||||
}
|
||||
|
||||
impl ModRMReg for u8 {
|
||||
fn val(&self) -> u8 {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl ModRMReg for Reg {
|
||||
fn val(&self) -> u8 {
|
||||
self.base()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ModRMReg {
|
||||
fn val(&self) -> u8;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn rex(w: impl RexW, r: impl RexBit, x: u8, b: impl RexBit) -> u8 {
|
||||
0b0100_0000 | bit(w.rexw(), 3) | bit(r.rex(), 2) | bit(x.rex(), 1) | bit(b.rex(), 0)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn bit(val: bool, pos: u8) -> u8 {
|
||||
(val as u8) << pos
|
||||
}
|
||||
|
||||
pub trait RexBit: Sized {
|
||||
fn rex(&self) -> bool;
|
||||
fn req(&self) -> bool {
|
||||
false
|
||||
}
|
||||
fn req_no(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl RexBit for u8 {
|
||||
fn rex(&self) -> bool {
|
||||
*self != 0
|
||||
}
|
||||
}
|
||||
|
||||
impl RexBit for Reg {
|
||||
fn rex(&self) -> bool {
|
||||
self.gt8()
|
||||
}
|
||||
fn req(&self) -> bool {
|
||||
self.gt4() && (self.width() == Width::B8) && !self.high()
|
||||
}
|
||||
fn req_no(&self) -> bool {
|
||||
self.high()
|
||||
}
|
||||
}
|
||||
|
||||
impl RexBit for Mem {
|
||||
fn rex(&self) -> bool {
|
||||
self.reg.rex()
|
||||
}
|
||||
fn req(&self) -> bool {
|
||||
self.reg.gt8()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait RexW {
|
||||
fn rexw(&self) -> bool;
|
||||
}
|
||||
|
||||
impl RexW for Width {
|
||||
fn rexw(&self) -> bool {
|
||||
*self == Width::B64
|
||||
}
|
||||
}
|
||||
|
||||
impl RexW for Reg {
|
||||
fn rexw(&self) -> bool {
|
||||
self.width().rexw()
|
||||
}
|
||||
}
|
||||
|
||||
impl RexW for u8 {
|
||||
fn rexw(&self) -> bool {
|
||||
*self == 1
|
||||
}
|
||||
}
|
||||
|
||||
impl RexW for Mem {
|
||||
fn rexw(&self) -> bool {
|
||||
self.width.rexw()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
use super::Width;
|
||||
use crate::io::CompilerMsg;
|
||||
use std::num::TryFromIntError;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, PartialOrd)]
|
||||
pub struct Imm(pub i128);
|
||||
|
||||
impl Imm {
|
||||
pub fn overflow_msg() -> CompilerMsg {
|
||||
"immediate overflow".into()
|
||||
}
|
||||
|
||||
pub fn width_signed(&self) -> Result<Width, CompilerMsg> {
|
||||
Ok(match self.0 {
|
||||
-0x80..=0x7f => Width::B8,
|
||||
-0x8000..=0x7fff => Width::B16,
|
||||
-0x8000_0000..=0x7fff_ffff => Width::B32,
|
||||
-0x8000_0000_0000_0000..=0x7fff_ffff_ffff_ffff => Width::B64,
|
||||
_ => return Err(Self::overflow_msg()),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn width_unsigned(&self) -> Result<Width, CompilerMsg> {
|
||||
Ok(match self.0 {
|
||||
-0xff..=0xff => Width::B8,
|
||||
-0xffff..=0xffff => Width::B16,
|
||||
-0xffff_ffff..=0xffff_ffff => Width::B32,
|
||||
-0xffff_ffff_ffff_ffff..=0xffff_ffff_ffff_ffff => Width::B64,
|
||||
_ => return Err(Self::overflow_msg()),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn reinterpret(&self, width: Width) -> Self {
|
||||
Self(match width {
|
||||
Width::B8 => self.0 as i8 as i128,
|
||||
Width::B16 => self.0 as i16 as i128,
|
||||
Width::B32 => self.0 as i32 as i128,
|
||||
Width::B64 => self.0 as i64 as i128,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Imm> for u8 {
|
||||
type Error = TryFromIntError;
|
||||
|
||||
fn try_from(value: Imm) -> Result<Self, Self::Error> {
|
||||
value.0.try_into()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u64> for Imm {
|
||||
fn from(value: u64) -> Self {
|
||||
Self(value as i128)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i64> for Imm {
|
||||
fn from(value: i64) -> Self {
|
||||
Self(value as i128)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i32> for Imm {
|
||||
fn from(value: i32) -> Self {
|
||||
Self(value as i128)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<i128> for Imm {
|
||||
fn from(value: i128) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Imm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Imm {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
use crate::arch::x86_64::util::signed_hex;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Mem {
|
||||
pub reg: Reg,
|
||||
pub disp: i32,
|
||||
pub width: Width,
|
||||
}
|
||||
|
||||
pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem {
|
||||
Mem { reg, disp, width }
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Mem {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Mem { reg, disp, width } = *self;
|
||||
let size = match width {
|
||||
Width::B8 => "BYTE",
|
||||
Width::B16 => "WORD",
|
||||
Width::B32 => "DWORD",
|
||||
Width::B64 => "QWORD",
|
||||
};
|
||||
write!(f, "{size} [{reg} {}]", signed_hex(disp as i128, true))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
mod arg;
|
||||
mod imm;
|
||||
mod mem;
|
||||
mod reg;
|
||||
mod width;
|
||||
|
||||
pub use arg::*;
|
||||
pub use imm::*;
|
||||
pub use mem::*;
|
||||
pub use reg::*;
|
||||
pub use width::*;
|
||||
@@ -0,0 +1,184 @@
|
||||
use super::Width;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
pub struct Reg {
|
||||
val: u8,
|
||||
high: bool,
|
||||
width: Width,
|
||||
}
|
||||
|
||||
def_regs! {
|
||||
0b0000 : rax eax ax al,
|
||||
0b0001 : rcx ecx cx cl !_,
|
||||
0b0010 : rdx edx dx dl,
|
||||
0b0011 : rbx ebx bx bl,
|
||||
|
||||
0b0100 : rsp esp sp spl norex=ah !_,
|
||||
0b0101 : rbp ebp bp bpl norex=ch !_,
|
||||
0b0110 : rsi esi si sil norex=dh !_,
|
||||
0b0111 : rdi edi di dil norex=bh,
|
||||
|
||||
0b1000 : r8 r8d r8w r8b,
|
||||
0b1001 : r9 r9d r9w r9b !_,
|
||||
0b1010 : r10 r10d r10w r10b,
|
||||
0b1011 : r11 r11d r11w r11b,
|
||||
0b1100 : r12 r12d r12w r12b !_,
|
||||
0b1101 : r13 r13d r13w r13b,
|
||||
0b1110 : r14 r14d r14w r14b,
|
||||
0b1111 : r15 r15d r15w r15b,
|
||||
}
|
||||
|
||||
impl Reg {
|
||||
pub fn base(&self) -> u8 {
|
||||
self.val & 0b111
|
||||
}
|
||||
/// checks if register is not one of the first 8 (0-7)
|
||||
pub fn gt8(&self) -> bool {
|
||||
self.val >= 0b1000
|
||||
}
|
||||
pub fn gt4(&self) -> bool {
|
||||
self.val >= 0b0100
|
||||
}
|
||||
|
||||
pub fn width(&self) -> Width {
|
||||
self.width
|
||||
}
|
||||
|
||||
pub fn not8(&self) -> u8 {
|
||||
self.width.not8()
|
||||
}
|
||||
|
||||
pub fn high(&self) -> bool {
|
||||
self.high
|
||||
}
|
||||
|
||||
/// if self has 64 bit width, changes width to 32 bit
|
||||
pub fn lower64(&self) -> Self {
|
||||
let mut new = *self;
|
||||
new.width = new.width.min(Width::B32);
|
||||
new
|
||||
}
|
||||
|
||||
pub fn requires_rex(&self) -> bool {
|
||||
self.gt8()
|
||||
|| self.width == Width::B64
|
||||
|| (self.gt4() && self.width == Width::B8 && !self.high)
|
||||
}
|
||||
|
||||
pub fn incompatible(&self, other: &Reg) -> bool {
|
||||
(self.requires_rex() && other.high) || (self.high && other.requires_rex())
|
||||
}
|
||||
|
||||
const fn new(val: u8, width: Width, high: bool) -> Self {
|
||||
Self { val, high, width }
|
||||
}
|
||||
}
|
||||
|
||||
impl Width {
|
||||
pub const fn max_val(&self) -> u64 {
|
||||
match self {
|
||||
Self::B64 => u64::MAX,
|
||||
Self::B32 => u32::MAX as u64,
|
||||
Self::B16 => u16::MAX as u64,
|
||||
Self::B8 { .. } => u8::MAX as u64,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn min(self, other: Self) -> Self {
|
||||
if self <= other { self } else { other }
|
||||
}
|
||||
|
||||
pub const fn bytes(&self) -> usize {
|
||||
match self {
|
||||
Self::B64 => 8,
|
||||
Self::B32 => 4,
|
||||
Self::B16 => 2,
|
||||
Self::B8 { .. } => 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// greater than 8 bits
|
||||
pub const fn not8(&self) -> u8 {
|
||||
!matches!(self, Self::B8) as u8
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! filter {
|
||||
($($filtered:ident)*; ! $_:tt $($item:ident)*; $($rest:tt)*) => {
|
||||
filter!($($filtered)* $($item)*; $($rest)*)
|
||||
};
|
||||
($($filtered:ident)*; $($item:ident)*; $($rest:tt)*) => {
|
||||
filter!($($filtered)*; $($rest)*)
|
||||
};
|
||||
($($filtered:ident)*;) => {
|
||||
[$($filtered, )*]
|
||||
};
|
||||
}
|
||||
use filter;
|
||||
|
||||
macro_rules! def_regs {
|
||||
($($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $(norex=$B8H:ident)? $(!$imp:tt)?,)*) => {
|
||||
$(
|
||||
#[allow(non_upper_case_globals)]
|
||||
pub const $B64: Reg = Reg::new($val, Width::B64, false);
|
||||
#[allow(non_upper_case_globals)]
|
||||
pub const $B32: Reg = Reg::new($val, Width::B32, false);
|
||||
#[allow(non_upper_case_globals)]
|
||||
pub const $B16: Reg = Reg::new($val, Width::B16, false);
|
||||
#[allow(non_upper_case_globals)]
|
||||
pub const $B8 : Reg = Reg::new($val, Width::B8 , false);
|
||||
$(
|
||||
#[allow(non_upper_case_globals)]
|
||||
pub const $B8H: Reg = Reg::new($val, Width::B8, true);
|
||||
)?
|
||||
)*
|
||||
|
||||
impl Reg {
|
||||
// #[cfg(test)]
|
||||
// pub const ALL: &[Reg] = &[
|
||||
// $( $B64, $B32, $B16, $B8, $($B8H,)? )*
|
||||
// ];
|
||||
|
||||
#[cfg(test)]
|
||||
pub const IMPORTANT: &[Reg] = &
|
||||
filter!(; $($(!$imp)? $B64 $B32 $B16 $B8 $($B8H)?; )* )
|
||||
;
|
||||
|
||||
pub fn parse(s: &str) -> Option<Self> {
|
||||
Some(match s.to_lowercase().as_str() {
|
||||
$(
|
||||
stringify!($B64) => $B64,
|
||||
stringify!($B32) => $B32,
|
||||
stringify!($B16) => $B16,
|
||||
stringify!($B8 ) => $B8,
|
||||
$(
|
||||
stringify!($B8H) => $B8H,
|
||||
)?
|
||||
)*
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl std::fmt::Display for Reg {
|
||||
#[allow(non_upper_case_globals)]
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", match *self {
|
||||
$(
|
||||
$B64 => stringify!($B64),
|
||||
$B32 => stringify!($B32),
|
||||
$B16 => stringify!($B16),
|
||||
$B8 => stringify!($B8),
|
||||
$(
|
||||
$B8H => stringify!($B8H),
|
||||
)?
|
||||
)*
|
||||
_ => "UNKNOWN",
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
use def_regs;
|
||||
|
||||
use crate::arch::x86_64::Imm;
|
||||
@@ -0,0 +1,22 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
|
||||
#[repr(u8)]
|
||||
pub enum Width {
|
||||
B8 = 0,
|
||||
B16 = 1,
|
||||
B32 = 2,
|
||||
B64 = 3,
|
||||
}
|
||||
|
||||
impl From<Reg> for Width {
|
||||
fn from(value: Reg) -> Self {
|
||||
value.width()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Mem> for Width {
|
||||
fn from(value: Mem) -> Self {
|
||||
value.width
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
/// assumes the next instruction is directly after
|
||||
pub fn addr_offset(pos: usize, addr: u64) -> [u8; 4] {
|
||||
let pos = (pos + 4) as i32;
|
||||
let offset = addr as i32 - pos;
|
||||
offset.to_le_bytes()
|
||||
}
|
||||
|
||||
pub struct SignedHex {
|
||||
pub val: i128,
|
||||
pub op: bool,
|
||||
}
|
||||
pub fn signed_hex(val: i128, op: bool) -> SignedHex {
|
||||
SignedHex { val, op }
|
||||
}
|
||||
impl std::fmt::Display for SignedHex {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let dsp = self.val.abs();
|
||||
let sign = match (self.op, self.val < 0) {
|
||||
(true, true) => "- ",
|
||||
(true, false) => "+ ",
|
||||
(false, true) => "-",
|
||||
(false, false) => "",
|
||||
};
|
||||
write!(f, "{sign}0x{dsp:x}")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
use crate::backend::{LinkedProgram, container::encode::ByteEncoder};
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default)]
|
||||
pub struct ELF64Header {
|
||||
magic: u32,
|
||||
class: u8,
|
||||
endianness: u8,
|
||||
ei_version: u8,
|
||||
os_abi: u8,
|
||||
os_abi_ver: u8,
|
||||
pad: [u8; 7],
|
||||
ty: u16,
|
||||
machine: u16,
|
||||
e_version: u32,
|
||||
entry: u64,
|
||||
program_header_offset: u64,
|
||||
section_header_offset: u64,
|
||||
flags: u32,
|
||||
header_size: u16,
|
||||
program_header_entry_size: u16,
|
||||
program_header_num: u16,
|
||||
section_header_entry_size: u16,
|
||||
section_header_num: u16,
|
||||
section_header_str_idx: u16,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Default)]
|
||||
pub struct ProgramHeader {
|
||||
ty: u32,
|
||||
flags: u32,
|
||||
offset: u64,
|
||||
vaddr: u64,
|
||||
paddr: u64,
|
||||
filesz: u64,
|
||||
memsz: u64,
|
||||
align: u64,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct SectionHeader {
|
||||
name_idx: u32,
|
||||
ty: u32,
|
||||
flags: u64,
|
||||
addr: u64,
|
||||
offset: u64,
|
||||
size: u64,
|
||||
link: u32,
|
||||
info: u32,
|
||||
addr_align: u64,
|
||||
entry_size: u64,
|
||||
}
|
||||
|
||||
pub enum Arch {
|
||||
X86_64,
|
||||
Riscv,
|
||||
}
|
||||
|
||||
impl Arch {
|
||||
pub fn machine(&self) -> u16 {
|
||||
match self {
|
||||
Arch::X86_64 => 0x3e,
|
||||
Arch::Riscv => 0xf3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(u8)]
|
||||
pub enum EType {
|
||||
None = 0,
|
||||
Rel = 1,
|
||||
Exec = 2,
|
||||
Dyn = 3,
|
||||
Core = 4,
|
||||
}
|
||||
|
||||
// this is currently specialized for x86_64; obviously add params later
|
||||
pub fn create(program: &[u8], start_offset: u64) -> Vec<u8> {
|
||||
let pie = true;
|
||||
let addr_start = if pie { 0 } else { 0x400000 };
|
||||
let page_size = 0x1000;
|
||||
// I don't know if I have to add addr_start here, idk how it maps the memory
|
||||
let program_size = std::mem::size_of_val(program) as u64;
|
||||
|
||||
let mut data = ByteEncoder::default();
|
||||
let header = data.reserve::<ELF64Header>();
|
||||
|
||||
let program_header_offset = data.pos() as u64;
|
||||
let program_header = data.reserve::<ProgramHeader>();
|
||||
|
||||
let program_pos = data.pos() as u64;
|
||||
data.extend(program);
|
||||
|
||||
data[header] = ELF64Header {
|
||||
magic: 0x7f_45_4c_46u32.swap_bytes(),
|
||||
class: 0x2, // 64 bit
|
||||
endianness: 0x1, // little endian
|
||||
ei_version: 0x1,
|
||||
os_abi: 0x0, // system-v
|
||||
os_abi_ver: 0x0,
|
||||
pad: [0x0; 7],
|
||||
ty: if pie { EType::Dyn } else { EType::Exec } as u16,
|
||||
machine: Arch::X86_64.machine(),
|
||||
e_version: 0x1,
|
||||
entry: addr_start + program_pos + start_offset,
|
||||
program_header_offset,
|
||||
section_header_offset: 0x0,
|
||||
// C ABI (16 bit instruction align) + double precision floats
|
||||
flags: 0x1 | 0x4,
|
||||
header_size: size_of::<ELF64Header>() as u16,
|
||||
program_header_entry_size: size_of::<ProgramHeader>() as u16,
|
||||
program_header_num: 0x1,
|
||||
section_header_entry_size: size_of::<SectionHeader>() as u16,
|
||||
section_header_num: 0x0,
|
||||
section_header_str_idx: 0x0,
|
||||
};
|
||||
data[program_header] = ProgramHeader {
|
||||
ty: 0x1, // LOAD
|
||||
flags: 0b101, // executable, readable
|
||||
offset: 0x0,
|
||||
vaddr: addr_start,
|
||||
paddr: 0x0,
|
||||
filesz: program_size,
|
||||
memsz: program_size,
|
||||
align: page_size,
|
||||
};
|
||||
data.data
|
||||
}
|
||||
|
||||
impl LinkedProgram<u64> {
|
||||
pub fn to_elf(&self) -> Vec<u8> {
|
||||
create(&self.code, self.entry.expect("no start"))
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
use std::ops::{Index, IndexMut};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ByteEncoder {
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl ByteEncoder {
|
||||
pub fn push(&mut self, byte: u8) {
|
||||
self.data.push(byte);
|
||||
}
|
||||
|
||||
pub fn val<T>(&mut self, val: &T) -> Reserved<T> {
|
||||
let pos = self.pos();
|
||||
let slice =
|
||||
unsafe { core::slice::from_raw_parts((val as *const T) as *const u8, size_of::<T>()) };
|
||||
self.data.extend(slice);
|
||||
Reserved::new(pos)
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> usize {
|
||||
self.data.len()
|
||||
}
|
||||
|
||||
pub fn align(&mut self, align: usize) {
|
||||
self.data.resize(self.data.len().next_multiple_of(align), 0);
|
||||
}
|
||||
|
||||
fn ptr_at<T>(&mut self, index: usize) -> *mut T {
|
||||
let slice = &mut self.data[index..index + size_of::<T>()];
|
||||
(slice as *mut [u8]) as *mut T
|
||||
}
|
||||
|
||||
pub fn set_at<T>(&mut self, index: usize, val: T) {
|
||||
let ptr = self.ptr_at::<T>(index);
|
||||
unsafe {
|
||||
ptr.write_unaligned(val);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn edit_at<T>(&mut self, index: usize, edit: impl FnOnce(T) -> T) {
|
||||
let ptr = self.ptr_at::<T>(index);
|
||||
unsafe {
|
||||
let val = ptr.read_unaligned();
|
||||
ptr.write_unaligned(edit(val));
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn reserve<T>(&mut self) -> Reserved<T> {
|
||||
let pos = self.pos();
|
||||
self.data.resize(self.data.len() + size_of::<T>(), 0);
|
||||
Reserved::new(pos)
|
||||
}
|
||||
|
||||
pub fn pad(&mut self, amt: usize) {
|
||||
self.data.resize(self.data.len() + amt, 0);
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn reserve_arr<T>(&mut self, len: usize) -> ReservedArr<T> {
|
||||
let pos = self.pos();
|
||||
self.data.resize(self.data.len() + size_of::<T>() * len, 0);
|
||||
ReservedArr::new(pos, len)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Reserved<T> {
|
||||
pos: usize,
|
||||
_pd: std::marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T> Clone for Reserved<T> {
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
impl<T> Copy for Reserved<T> {}
|
||||
|
||||
pub struct ReservedArr<T> {
|
||||
pos: usize,
|
||||
len: usize,
|
||||
_pd: std::marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T> Clone for ReservedArr<T> {
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
impl<T> Copy for ReservedArr<T> {}
|
||||
|
||||
impl<T> Reserved<T> {
|
||||
fn new(pos: usize) -> Self {
|
||||
Self {
|
||||
pos,
|
||||
_pd: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> ReservedArr<T> {
|
||||
fn new(pos: usize, len: usize) -> Self {
|
||||
Self {
|
||||
pos,
|
||||
len,
|
||||
_pd: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Index<Reserved<T>> for ByteEncoder {
|
||||
type Output = T;
|
||||
|
||||
fn index(&self, index: Reserved<T>) -> &Self::Output {
|
||||
let slice = &self.data[index.pos..index.pos + size_of::<T>()];
|
||||
unsafe { &core::slice::from_raw_parts((slice as *const [u8]) as *const T, 1)[0] }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IndexMut<Reserved<T>> for ByteEncoder {
|
||||
fn index_mut(&mut self, index: Reserved<T>) -> &mut Self::Output {
|
||||
let slice = &mut self.data[index.pos..index.pos + size_of::<T>()];
|
||||
unsafe { &mut core::slice::from_raw_parts_mut((slice as *mut [u8]) as *mut T, 1)[0] }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Index<ReservedArr<T>> for ByteEncoder {
|
||||
type Output = [T];
|
||||
|
||||
fn index(&self, index: ReservedArr<T>) -> &Self::Output {
|
||||
let slice = &self.data[index.pos..index.pos + size_of::<T>() * index.len];
|
||||
unsafe { core::slice::from_raw_parts((slice as *const [u8]) as *const T, index.len) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IndexMut<ReservedArr<T>> for ByteEncoder {
|
||||
fn index_mut(&mut self, index: ReservedArr<T>) -> &mut Self::Output {
|
||||
let slice = &mut self.data[index.pos..index.pos + size_of::<T>() * index.len];
|
||||
unsafe { core::slice::from_raw_parts_mut((slice as *mut [u8]) as *mut T, index.len) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Extend<u8> for ByteEncoder {
|
||||
fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
|
||||
self.data.extend(iter);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Extend<&'a u8> for ByteEncoder {
|
||||
fn extend<T: IntoIterator<Item = &'a u8>>(&mut self, iter: T) {
|
||||
self.data.extend(iter);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
pub mod elf;
|
||||
mod encode;
|
||||
pub mod pe;
|
||||
|
||||
use encode::*;
|
||||
@@ -0,0 +1,27 @@
|
||||
#[derive(Default)]
|
||||
#[repr(C)]
|
||||
pub struct DataDirs {
|
||||
pub export: DataDir,
|
||||
pub import: DataDir,
|
||||
pub rsc: DataDir,
|
||||
pub exception: DataDir,
|
||||
pub cert: DataDir,
|
||||
pub base_reloc: DataDir,
|
||||
pub debug: DataDir,
|
||||
pub arch: DataDir,
|
||||
pub global_ptr: DataDir,
|
||||
pub tls: DataDir,
|
||||
pub load_config: DataDir,
|
||||
pub bound_import: DataDir,
|
||||
pub import_addr: DataDir,
|
||||
pub delay_import_desc: DataDir,
|
||||
pub clr_runtime_header: DataDir,
|
||||
pub reserved: DataDir,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
#[repr(C)]
|
||||
pub struct DataDir {
|
||||
pub virt_addr_rva: u32,
|
||||
pub size: u32,
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
#[repr(C)]
|
||||
pub struct MZHeader {
|
||||
pub magic: u16,
|
||||
pub stuff: [u16; 15 + 4 + 10],
|
||||
pub lfanew: u32,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct PeHeader {
|
||||
pub magic: u32,
|
||||
pub machine: u16,
|
||||
pub num_sections: u16,
|
||||
pub time_date_stamp: u32,
|
||||
pub sym_tab_ptr: u32,
|
||||
pub num_symbols: u32,
|
||||
pub opt_header_size: u16,
|
||||
pub characteristics: u16,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct OptHeader64 {
|
||||
pub magic: u16,
|
||||
pub major_linker_ver: u8,
|
||||
pub minor_linker_ver: u8,
|
||||
pub code_size: u32,
|
||||
pub init_data_size: u32,
|
||||
pub uninit_data_size: u32,
|
||||
pub entry_addr: u32,
|
||||
pub code_base: u32,
|
||||
pub image_base: u64,
|
||||
pub section_align: u32,
|
||||
pub file_align: u32,
|
||||
pub major_os_ver: u16,
|
||||
pub minor_os_ver: u16,
|
||||
pub major_image_ver: u16,
|
||||
pub minor_image_ver: u16,
|
||||
pub major_subsystem_ver: u16,
|
||||
pub minor_subsystem_ver: u16,
|
||||
pub win32_ver: u32,
|
||||
pub image_size: u32,
|
||||
pub headers_size: u32,
|
||||
pub checksum: u32,
|
||||
pub subsystem: u16,
|
||||
pub dll_characteristics: u16,
|
||||
pub stack_reserve_size: u64,
|
||||
pub stack_commit_size: u64,
|
||||
pub heap_reserve_size: u64,
|
||||
pub heap_commit_size: u64,
|
||||
pub loader_flags: u32,
|
||||
pub num_of_rva_and_sizes: u32,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct Section {
|
||||
pub name: [u8; 8],
|
||||
pub virtual_size: u32,
|
||||
pub virtual_addr: u32,
|
||||
pub raw_data_size: u32,
|
||||
pub raw_data_ptr: u32,
|
||||
pub reloc_ptr: u32,
|
||||
pub line_num_ptr: u32,
|
||||
pub num_relocs: u16,
|
||||
pub num_line_nums: u16,
|
||||
pub characteristics: u32,
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
use crate::backend::{LibImport, pe::data_dir::DataDir};
|
||||
|
||||
use super::ByteEncoder;
|
||||
|
||||
pub fn encode(data: &mut ByteEncoder, imports: &[LibImport], code_start: usize) -> DataDir {
|
||||
data.align(4);
|
||||
let start = data.pos() as u32;
|
||||
let idt = data.reserve_arr::<ImportDirTable>(imports.len());
|
||||
// null entry to mark end
|
||||
data.pad(size_of::<ImportDirTable>());
|
||||
let end = data.pos() as u32;
|
||||
|
||||
for (i, import) in imports.iter().enumerate() {
|
||||
// name
|
||||
let name_rva = data.pos() as u32;
|
||||
data.extend(import.name.as_bytes());
|
||||
data.push(0);
|
||||
|
||||
// lookup table
|
||||
data.align(size_of::<ImportLookupEntry>());
|
||||
let lookup_start = data.pos();
|
||||
let lookup = data.reserve_arr::<ImportLookupEntry>(import.syms.len());
|
||||
data.pad(size_of::<ImportLookupEntry>());
|
||||
|
||||
for (i, sym) in import.syms.iter().enumerate() {
|
||||
let rva = hint_name_entry(data, 0, &sym.name);
|
||||
data[lookup][i] = ImportLookupEntry::name(rva);
|
||||
}
|
||||
|
||||
// address table
|
||||
data.align(size_of::<ImportLookupEntry>());
|
||||
let addr_start = data.pos();
|
||||
for (i, sym) in import.syms.iter().enumerate() {
|
||||
let here = data.pos() as i32;
|
||||
for &usage in &sym.usages {
|
||||
// NOTE: sets relative offet rn
|
||||
let code_pos = code_start + usage;
|
||||
data.set_at::<i32>(code_pos, here - code_pos as i32 - 4);
|
||||
}
|
||||
let entry = data[lookup][i];
|
||||
data.val(&entry);
|
||||
}
|
||||
data.pad(size_of::<ImportLookupEntry>());
|
||||
|
||||
// entry
|
||||
data[idt][i] = ImportDirTable {
|
||||
lookup_table_rva: lookup_start as u32,
|
||||
time_date_stamp: 0,
|
||||
forwarder_chain: 0,
|
||||
name_rva,
|
||||
address_table_rva: addr_start as u32,
|
||||
};
|
||||
}
|
||||
DataDir {
|
||||
virt_addr_rva: start,
|
||||
size: end - start,
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
pub struct ImportDirTable {
|
||||
pub lookup_table_rva: u32,
|
||||
pub time_date_stamp: u32,
|
||||
pub forwarder_chain: u32,
|
||||
pub name_rva: u32,
|
||||
pub address_table_rva: u32,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct ImportLookupEntry(u64);
|
||||
|
||||
impl ImportLookupEntry {
|
||||
pub const NULL: Self = Self(0);
|
||||
pub fn name(hint_name_table_rva: u32) -> Self {
|
||||
assert!(hint_name_table_rva >> 30 == 0);
|
||||
Self(hint_name_table_rva as u64)
|
||||
}
|
||||
pub fn ordinal(ordinal: u16) -> Self {
|
||||
Self(ordinal as u64 | (1 << 63))
|
||||
}
|
||||
pub fn bytes(&self) -> [u8; 8] {
|
||||
self.0.to_le_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hint_name_entry(data: &mut ByteEncoder, hint: u16, name: &str) -> u32 {
|
||||
let pos = data.pos() as u32;
|
||||
data.extend(hint.to_le_bytes());
|
||||
data.extend(name.as_bytes());
|
||||
data.push(0);
|
||||
data.align(2);
|
||||
pos
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
mod data_dir;
|
||||
mod header;
|
||||
mod import;
|
||||
|
||||
use super::*;
|
||||
use crate::backend::LinkedProgram;
|
||||
|
||||
use data_dir::*;
|
||||
use header::*;
|
||||
|
||||
pub fn create(program: &LinkedProgram<u64>) -> Vec<u8> {
|
||||
let mut data = ByteEncoder::default();
|
||||
let file_align = 1;
|
||||
let section_align = 1;
|
||||
let num_of_rva_and_sizes: u32 = (size_of::<DataDirs>() / size_of::<DataDir>()) as u32;
|
||||
|
||||
let mz_header = data.reserve::<MZHeader>();
|
||||
|
||||
let pe_header_pos = data.pos();
|
||||
data.val(&PeHeader {
|
||||
magic: u32::from_ne_bytes(*b"PE\0\0"),
|
||||
machine: 0x8664,
|
||||
num_sections: 1,
|
||||
time_date_stamp: 0,
|
||||
sym_tab_ptr: 0,
|
||||
num_symbols: 0,
|
||||
opt_header_size: (size_of::<OptHeader64>() + size_of::<DataDirs>()) as u16,
|
||||
// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format
|
||||
// executable | can handle >2GB addrs | debug info removed
|
||||
characteristics: 0x2 | 0x20 | 0x0200,
|
||||
});
|
||||
|
||||
data[mz_header] = MZHeader {
|
||||
magic: u16::from_ne_bytes(*b"MZ"),
|
||||
stuff: [0; _],
|
||||
lfanew: pe_header_pos as u32,
|
||||
};
|
||||
|
||||
let opt_header = data.reserve::<OptHeader64>();
|
||||
|
||||
let data_dirs = data.val(&DataDirs::default());
|
||||
|
||||
let code_sect = data.reserve::<Section>();
|
||||
let hdr_size = data.pos() as u32;
|
||||
|
||||
// .text start
|
||||
let text_start = data.pos() as u32;
|
||||
|
||||
let code_start = data.pos();
|
||||
data.extend(&program.code);
|
||||
|
||||
if !program.imports.is_empty() {
|
||||
let import_rva = import::encode(&mut data, &program.imports, code_start);
|
||||
data[data_dirs].import = import_rva;
|
||||
}
|
||||
|
||||
let text_size = data.pos() as u32 - text_start;
|
||||
// .text end
|
||||
|
||||
data[code_sect] = Section {
|
||||
name: *b".text\0\0\0",
|
||||
virtual_size: text_size,
|
||||
virtual_addr: hdr_size.next_multiple_of(section_align),
|
||||
raw_data_size: text_size.next_multiple_of(file_align),
|
||||
raw_data_ptr: text_start,
|
||||
reloc_ptr: 0,
|
||||
line_num_ptr: 0,
|
||||
num_relocs: 0,
|
||||
num_line_nums: 0,
|
||||
characteristics: 0x60000020,
|
||||
};
|
||||
|
||||
let file_size = data.pos() as u32;
|
||||
|
||||
data[opt_header] = OptHeader64 {
|
||||
magic: 0x20b,
|
||||
major_linker_ver: 8,
|
||||
minor_linker_ver: 0,
|
||||
code_size: text_size.next_multiple_of(file_align),
|
||||
init_data_size: 0,
|
||||
uninit_data_size: 0,
|
||||
entry_addr: (code_start as u64 + program.entry.unwrap()) as u32,
|
||||
code_base: text_start,
|
||||
image_base: 0x400000,
|
||||
section_align,
|
||||
file_align,
|
||||
major_os_ver: 4,
|
||||
minor_os_ver: 0,
|
||||
major_image_ver: 0,
|
||||
minor_image_ver: 0,
|
||||
major_subsystem_ver: 4,
|
||||
minor_subsystem_ver: 0,
|
||||
win32_ver: 0,
|
||||
image_size: file_size.next_multiple_of(section_align),
|
||||
headers_size: hdr_size.next_multiple_of(file_align),
|
||||
checksum: 0,
|
||||
subsystem: 3, // windows CLI app
|
||||
dll_characteristics: 0x400,
|
||||
stack_reserve_size: 0x100000,
|
||||
stack_commit_size: 0x1000,
|
||||
heap_reserve_size: 0x100000,
|
||||
heap_commit_size: 0x1000,
|
||||
loader_flags: 0,
|
||||
num_of_rva_and_sizes,
|
||||
};
|
||||
|
||||
data.data
|
||||
}
|
||||
|
||||
impl LinkedProgram<u64> {
|
||||
pub fn to_pe(&self) -> Vec<u8> {
|
||||
create(&self)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
pub trait Addr: Clone + Copy {
|
||||
fn from_len(len: usize) -> Self;
|
||||
}
|
||||
|
||||
impl Addr for u64 {
|
||||
fn from_len(len: usize) -> Self {
|
||||
len as Self
|
||||
}
|
||||
}
|
||||
|
||||
impl Addr for u32 {
|
||||
fn from_len(len: usize) -> Self {
|
||||
len as Self
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
mod addr;
|
||||
mod symbol;
|
||||
pub use addr::*;
|
||||
pub use symbol::*;
|
||||
|
||||
use crate::{arch::Arch, backend::LinkedProgram, io::CompilerMsg};
|
||||
|
||||
pub struct Program<A: Arch> {
|
||||
pub ro_data: Vec<Data>,
|
||||
pub funcs: Vec<Func<A>>,
|
||||
pub entry: Option<Symbol>,
|
||||
pub external: Vec<External>,
|
||||
|
||||
sym_info: Vec<SymInfo>,
|
||||
sym_count: usize,
|
||||
}
|
||||
|
||||
pub struct Data {
|
||||
pub bytes: Vec<u8>,
|
||||
pub sym: Symbol,
|
||||
}
|
||||
|
||||
pub struct Func<A: Arch> {
|
||||
pub instrs: Vec<Instr<A>>,
|
||||
pub sym: Symbol,
|
||||
}
|
||||
|
||||
pub struct External {
|
||||
pub file: String,
|
||||
pub syms: Vec<Symbol>,
|
||||
}
|
||||
|
||||
pub struct SymInfo {
|
||||
pub name: String,
|
||||
pub external: bool,
|
||||
}
|
||||
|
||||
pub enum Instr<A: Arch> {
|
||||
Set { dst: VarId, src: Vec<u8> },
|
||||
Call { dst: FnId, args: Vec<VarId> },
|
||||
Copy { dst: VarId, src: VarId },
|
||||
Asm(A::Asm),
|
||||
}
|
||||
|
||||
pub type VarId = usize;
|
||||
pub type FnId = usize;
|
||||
|
||||
impl<A: Arch> Program<A> {
|
||||
pub fn encode_data(&self, data: &mut Vec<u8>, sym_tab: &mut SymTable<A::Addr>) {
|
||||
for d in &self.ro_data {
|
||||
let addr = A::Addr::from_len(data.len());
|
||||
data.extend(&d.bytes);
|
||||
sym_tab.insert(d.sym, addr);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ro_data(&mut self, name: impl Into<String>, bytes: impl Into<Vec<u8>>) -> Symbol {
|
||||
let bytes = bytes.into();
|
||||
let sym = self.reserve(SymInfo {
|
||||
name: name.into(),
|
||||
external: false,
|
||||
});
|
||||
self.ro_data.push(Data { bytes, sym });
|
||||
sym
|
||||
}
|
||||
|
||||
pub fn func(&mut self, name: impl Into<String>, instrs: impl Into<Vec<Instr<A>>>) -> Symbol {
|
||||
let instrs = instrs.into();
|
||||
let sym = self.reserve(SymInfo {
|
||||
name: name.into(),
|
||||
external: false,
|
||||
});
|
||||
self.funcs.push(Func { instrs, sym });
|
||||
sym
|
||||
}
|
||||
|
||||
pub fn external<const LEN: usize>(
|
||||
&mut self,
|
||||
file: impl Into<String>,
|
||||
names: [impl Into<String>; LEN],
|
||||
) -> [Symbol; LEN] {
|
||||
let syms = names.map(|s| {
|
||||
self.reserve(SymInfo {
|
||||
name: s.into(),
|
||||
external: true,
|
||||
})
|
||||
});
|
||||
self.external.push(External {
|
||||
file: file.into(),
|
||||
syms: syms.to_vec(),
|
||||
});
|
||||
syms
|
||||
}
|
||||
|
||||
fn reserve(&mut self, info: SymInfo) -> Symbol {
|
||||
let res = Symbol(self.sym_count);
|
||||
self.sym_info.push(info);
|
||||
self.sym_count += 1;
|
||||
res
|
||||
}
|
||||
|
||||
pub fn compile(&self) -> Result<LinkedProgram<A::Addr>, CompilerMsg> {
|
||||
A::compile(self)
|
||||
}
|
||||
|
||||
pub fn sym_count(&self) -> usize {
|
||||
self.sym_count
|
||||
}
|
||||
|
||||
pub fn sym_info(&self, sym: Symbol) -> &SymInfo {
|
||||
&self.sym_info[sym.0]
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: Arch> Default for Program<A> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ro_data: Default::default(),
|
||||
funcs: Default::default(),
|
||||
entry: Default::default(),
|
||||
sym_count: Default::default(),
|
||||
external: Default::default(),
|
||||
sym_info: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
|
||||
pub struct Symbol(pub(super) usize);
|
||||
|
||||
pub struct SymTable<Addr>(Vec<Option<Addr>>);
|
||||
impl<Addr: Clone + Copy> SymTable<Addr> {
|
||||
pub fn new(len: usize) -> Self {
|
||||
Self(vec![None; len])
|
||||
}
|
||||
pub fn insert(&mut self, sym: Symbol, addr: Addr) {
|
||||
self.0[sym.0] = Some(addr);
|
||||
}
|
||||
pub fn get(&self, sym: Symbol) -> Option<Addr> {
|
||||
self.0[sym.0]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
pub struct LinkedProgram<Addr> {
|
||||
pub code: Vec<u8>,
|
||||
pub entry: Option<Addr>,
|
||||
pub imports: Vec<LibImport>,
|
||||
}
|
||||
|
||||
pub struct LibImport {
|
||||
pub name: String,
|
||||
pub syms: Vec<SymImport>,
|
||||
}
|
||||
|
||||
pub struct SymImport {
|
||||
pub name: String,
|
||||
pub usages: Vec<usize>,
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
mod container;
|
||||
mod ir;
|
||||
mod link;
|
||||
|
||||
pub use container::*;
|
||||
pub use ir::*;
|
||||
pub use link::*;
|
||||
+142
@@ -0,0 +1,142 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Span {
|
||||
pub file: usize,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
pub struct Spanned<T> {
|
||||
pub inner: T,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl<T> std::ops::Deref for Spanned<T> {
|
||||
type Target = T;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> std::ops::DerefMut for Spanned<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.inner
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CompilerMsg {
|
||||
pub spans: Vec<Span>,
|
||||
pub msg: String,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CompilerOutput {
|
||||
pub errors: Vec<CompilerMsg>,
|
||||
pub files: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
impl CompilerOutput {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
pub fn error(&mut self, msg: impl Into<CompilerMsg>) {
|
||||
self.errors.push(msg.into());
|
||||
}
|
||||
pub fn write(&self, w: &mut impl std::io::Write) {
|
||||
let files: Vec<_> = self
|
||||
.files
|
||||
.iter()
|
||||
.map(|path| std::fs::read_to_string(path).unwrap())
|
||||
.collect();
|
||||
for error in &self.errors {
|
||||
writeln!(w, "Error: {}", error.msg).unwrap();
|
||||
for span in &error.spans {
|
||||
span.write(w, &files[span.file]).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn write(&self, w: &mut impl std::io::Write, text: &str) -> std::io::Result<()> {
|
||||
let mut line_start = 0;
|
||||
let mut found = false;
|
||||
let mut line = 1;
|
||||
let mut spans = Vec::new();
|
||||
for (i, c) in text.char_indices() {
|
||||
if i == self.start {
|
||||
found = true;
|
||||
}
|
||||
if i == self.end {
|
||||
found = true;
|
||||
}
|
||||
if c == '\n' {
|
||||
if found {
|
||||
spans.push((line, line_start..i));
|
||||
}
|
||||
line_start = i + 1;
|
||||
line += 1;
|
||||
found = false;
|
||||
}
|
||||
}
|
||||
let underline = "\x1b[4:3m";
|
||||
let underline_color = "\x1b[58;5;1m";
|
||||
let end = "\x1b[0m";
|
||||
if let [(line, range)] = &spans[..] {
|
||||
writeln!(
|
||||
w,
|
||||
" {line:3} | {}{underline}{underline_color}{}{end}{}",
|
||||
&text[range.start..self.start],
|
||||
&text[self.start..=self.end],
|
||||
&text[(self.end + 1)..range.end]
|
||||
)?;
|
||||
} else if let [(sline, srange), (eline, erange)] = &spans[..] {
|
||||
writeln!(
|
||||
w,
|
||||
" {sline:3} | {}{underline}{underline_color}{}{end}",
|
||||
&text[srange.start..self.start],
|
||||
&text[self.start..=srange.end - 1],
|
||||
)?;
|
||||
if *eline != *sline + 1 {
|
||||
writeln!(w, " ...")?;
|
||||
}
|
||||
writeln!(
|
||||
w,
|
||||
" {eline:3} | {underline}{underline_color}{}{end}{}",
|
||||
&text[erange.start..=self.end],
|
||||
&text[(self.end + 1)..=erange.end - 1],
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for CompilerMsg {
|
||||
fn from(msg: String) -> Self {
|
||||
Self {
|
||||
spans: Vec::new(),
|
||||
msg,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for CompilerMsg {
|
||||
fn from(msg: &str) -> Self {
|
||||
Self {
|
||||
spans: Vec::new(),
|
||||
msg: msg.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Into<String>> From<(S, Span)> for CompilerMsg {
|
||||
fn from((msg, span): (S, Span)) -> Self {
|
||||
Self {
|
||||
spans: vec![span],
|
||||
msg: msg.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
use std::ops::{Index, IndexMut};
|
||||
|
||||
pub struct Id<T> {
|
||||
idx: usize,
|
||||
_pd: std::marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
pub struct IdVec<T> {
|
||||
vec: Vec<T>,
|
||||
}
|
||||
|
||||
impl<T> IdVec<T> {
|
||||
pub fn add(&mut self, val: T) -> Id<T> {
|
||||
let id = Id {
|
||||
idx: self.vec.len(),
|
||||
_pd: Default::default(),
|
||||
};
|
||||
self.vec.push(val);
|
||||
id
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Index<Id<T>> for IdVec<T> {
|
||||
type Output = T;
|
||||
|
||||
fn index(&self, index: Id<T>) -> &Self::Output {
|
||||
&self.vec[index.idx]
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IndexMut<Id<T>> for IdVec<T> {
|
||||
fn index_mut(&mut self, index: Id<T>) -> &mut Self::Output {
|
||||
&mut self.vec[index.idx]
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for IdVec<T> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
vec: Default::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Clone for Id<T> {
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Copy for Id<T> {}
|
||||
@@ -0,0 +1,23 @@
|
||||
mod id;
|
||||
mod structs;
|
||||
pub use id::*;
|
||||
pub use structs::*;
|
||||
|
||||
pub struct Ir {
|
||||
pub root: Id<Namespace>,
|
||||
pub namespaces: IdVec<Namespace>,
|
||||
}
|
||||
|
||||
impl Ir {
|
||||
pub fn root(&mut self) -> &mut Namespace {
|
||||
&mut self.namespaces[self.root]
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Ir {
|
||||
fn default() -> Self {
|
||||
let mut namespaces = IdVec::default();
|
||||
let root = namespaces.add(Namespace::default());
|
||||
Self { root, namespaces }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
mod namespace;
|
||||
pub use namespace::*;
|
||||
|
||||
use super::Id;
|
||||
|
||||
pub struct Fn {
|
||||
pub body: Body,
|
||||
}
|
||||
|
||||
pub struct Body {
|
||||
pub statements: Vec<Statement>,
|
||||
}
|
||||
|
||||
pub struct Statement {
|
||||
ty: StatementTy,
|
||||
}
|
||||
|
||||
pub enum StatementTy {
|
||||
Define { target: VarId, val: VarId },
|
||||
Assign { target: VarId, val: VarId },
|
||||
Call { target: VarId, args: Vec<VarId> },
|
||||
}
|
||||
|
||||
pub struct Var {
|
||||
const_: bool,
|
||||
ty: TypeId,
|
||||
}
|
||||
|
||||
pub enum Type {
|
||||
Unsigned(u8),
|
||||
Signed(u8),
|
||||
Array(TypeId),
|
||||
Ptr(TypeId),
|
||||
Infer,
|
||||
}
|
||||
|
||||
pub type VarId = u32;
|
||||
pub type TypeId = u32;
|
||||
@@ -0,0 +1,11 @@
|
||||
use super::*;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Namespace {
|
||||
pub items: HashMap<String, Item>,
|
||||
}
|
||||
|
||||
pub enum Item {
|
||||
Import(Id<Namespace>),
|
||||
}
|
||||
+17
-4
@@ -1,8 +1,21 @@
|
||||
#![feature(try_trait_v2)]
|
||||
#![feature(associated_type_defaults)]
|
||||
#![feature(trait_alias)]
|
||||
#![cfg_attr(test, feature(gen_blocks))]
|
||||
|
||||
use crate::{io::CompilerOutput, parser_ir::parse_program};
|
||||
|
||||
mod arch;
|
||||
mod backend;
|
||||
mod io;
|
||||
mod ir;
|
||||
mod parser;
|
||||
mod parser_ir;
|
||||
|
||||
fn main() {
|
||||
parser::parse(include_str!("test.lang"));
|
||||
let mut args = std::env::args();
|
||||
let Some(path) = args.nth(1) else {
|
||||
println!("file expected");
|
||||
return;
|
||||
};
|
||||
let mut output = CompilerOutput::new();
|
||||
let ir = parse_program(&path, &mut output);
|
||||
output.write(&mut std::io::stdout());
|
||||
}
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
pub struct ParserCtx<'a> {
|
||||
pub cursor: TokenCursor<'a>,
|
||||
pub msgs: &'a mut Vec<CompilerMsg>,
|
||||
}
|
||||
|
||||
impl<'a> ParserCtx<'a> {
|
||||
pub fn new(cursor: impl Into<TokenCursor<'a>>, msgs: &'a mut Vec<CompilerMsg>) -> Self {
|
||||
Self {
|
||||
cursor: cursor.into(),
|
||||
msgs,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse<T: Parsable<Data = ()>>(&mut self) -> Option<Node<T>> {
|
||||
self.parse_with(())
|
||||
}
|
||||
|
||||
pub fn parse_with<T: Parsable>(&mut self, data: T::Data) -> Option<Node<T>> {
|
||||
let data = match T::parse(self, data) {
|
||||
ParseResult::Ok(t) => Some(t),
|
||||
ParseResult::Node(n) => return Some(n),
|
||||
ParseResult::Break(msg) => {
|
||||
self.msgs.push(msg);
|
||||
return None;
|
||||
}
|
||||
ParseResult::Continue(msg) => {
|
||||
self.msgs.push(msg);
|
||||
None
|
||||
}
|
||||
ParseResult::SubErr => {
|
||||
return None;
|
||||
}
|
||||
};
|
||||
Some(Node { data })
|
||||
}
|
||||
|
||||
pub fn seek(&mut self, token: impl Into<Token>) -> bool {
|
||||
let token = token.into();
|
||||
while let Some(next) = self.next() {
|
||||
if next == token {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn expect_next(&mut self) -> Option<Token> {
|
||||
let res = self.cursor.next();
|
||||
if res.is_none() {
|
||||
self.msgs.push(CompilerMsg::new(
|
||||
"Unexpected end of input",
|
||||
self.cursor.prev_end(),
|
||||
));
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
pub fn expect_peek(&self) -> Result<&Token, CompilerMsg> {
|
||||
match self.cursor.peek() {
|
||||
Some(t) => Ok(t),
|
||||
None => Err(self.unexpected_end()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expect(&mut self, token: impl Into<Token>) -> Result<(), CompilerMsg> {
|
||||
let token = token.into();
|
||||
if self.next_is_ref(&token) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(self.unexpected(format!("token {:?}", token)))
|
||||
}
|
||||
}
|
||||
|
||||
fn unexpected_end(&self) -> CompilerMsg {
|
||||
CompilerMsg::new("Unexpected end of input", self.next_start())
|
||||
}
|
||||
|
||||
pub fn peek(&self) -> Option<&Token> {
|
||||
self.cursor.peek()
|
||||
}
|
||||
|
||||
pub fn unexpected<'b>(&self, expected: impl std::fmt::Display) -> CompilerMsg {
|
||||
if let Some((next, span)) = self.peek_span() {
|
||||
CompilerMsg::new(
|
||||
format!("Unexpected token {:?}, expected {}", next, expected),
|
||||
span,
|
||||
)
|
||||
} else {
|
||||
self.unexpected_end()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::ops::Deref for ParserCtx<'a> {
|
||||
type Target = TokenCursor<'a>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.cursor
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::ops::DerefMut for ParserCtx<'a> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.cursor
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
use super::Token;
|
||||
use crate::io::Span;
|
||||
|
||||
pub struct Lit {
|
||||
pub ty: LitTy,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(PartialEq)]
|
||||
pub enum LitTy {
|
||||
Number(String),
|
||||
Bool(bool),
|
||||
String(String),
|
||||
Unit,
|
||||
}
|
||||
|
||||
impl From<LitTy> for Token {
|
||||
fn from(value: LitTy) -> Self {
|
||||
Self::Lit(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for LitTy {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Number(n) => write!(f, "{n}"),
|
||||
Self::Bool(b) => write!(f, "{b}"),
|
||||
Self::String(s) => write!(f, "\"{s}\""),
|
||||
Self::Unit => write!(f, "()"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Lit {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.ty.fmt(f)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
use std::borrow::Borrow;
|
||||
|
||||
use crate::io::{CompilerMsg, Span, Spanned};
|
||||
|
||||
mod lit;
|
||||
mod token;
|
||||
pub use lit::*;
|
||||
pub use token::*;
|
||||
|
||||
pub struct Cursor<'a> {
|
||||
pub span: Span,
|
||||
next: Option<TokenInst>,
|
||||
tokens: Tokens<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Cursor<'a> {
|
||||
pub fn new(text: &'a str, file: usize) -> Self {
|
||||
let mut s = Self {
|
||||
span: Span {
|
||||
start: 0,
|
||||
end: 0,
|
||||
file,
|
||||
},
|
||||
next: None,
|
||||
tokens: Tokens::new(text, file),
|
||||
};
|
||||
s.next();
|
||||
s
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Option<Token> {
|
||||
let mut next = self.tokens.next();
|
||||
std::mem::swap(&mut self.next, &mut next);
|
||||
next.map(|inst| {
|
||||
self.span = inst.span;
|
||||
inst.inner
|
||||
})
|
||||
}
|
||||
|
||||
pub fn next_if(&mut self, token: impl Borrow<Token>) -> bool {
|
||||
if self.peek().is_some_and(|t| t == token.borrow()) {
|
||||
self.next();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek(&self) -> Option<&Token> {
|
||||
self.next.as_ref().map(|i| &i.inner)
|
||||
}
|
||||
|
||||
pub fn expect_next(&mut self) -> Result<Token, CompilerMsg> {
|
||||
self.next().ok_or_else(CompilerMsg::unexpected_eof)
|
||||
}
|
||||
|
||||
pub fn expect_peek(&self) -> Result<&Token, CompilerMsg> {
|
||||
self.peek().ok_or_else(CompilerMsg::unexpected_eof)
|
||||
}
|
||||
|
||||
pub fn expect(&mut self, token: impl Borrow<Token>) -> Result<Token, CompilerMsg> {
|
||||
let token = token.borrow();
|
||||
let next = self.expect_next()?;
|
||||
if next == *token {
|
||||
Ok(next)
|
||||
} else {
|
||||
self.unexpected(next, &format!("'{token}'"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unexpected<T>(&self, token: Token, expected: &str) -> Result<T, CompilerMsg> {
|
||||
Err(CompilerMsg::unexpected_token(&token, self.span, expected))
|
||||
}
|
||||
|
||||
pub fn peek_start(&mut self) -> usize {
|
||||
self.next.as_ref().map(|i| i.span.start).unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn cur_end(&mut self) -> usize {
|
||||
self.span.end
|
||||
}
|
||||
|
||||
pub fn file(&mut self) -> usize {
|
||||
self.span.file
|
||||
}
|
||||
}
|
||||
|
||||
impl CompilerMsg {
|
||||
pub fn unexpected_token(token: &Token, span: Span, expected: &str) -> Self {
|
||||
Self {
|
||||
spans: vec![span],
|
||||
msg: format!("Unexpected token '{}', expected {expected}", token),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unexpected_eof() -> Self {
|
||||
Self {
|
||||
spans: Vec::new(),
|
||||
msg: "unexpected end of file".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
use crate::parser::cursor::LitTy;
|
||||
|
||||
use super::{Span, Spanned};
|
||||
use std::{iter::Peekable, str::CharIndices};
|
||||
|
||||
def_tokens! {
|
||||
symbol {
|
||||
Dot: ".",
|
||||
Comma: ",",
|
||||
Equal: "=",
|
||||
Colon: ":",
|
||||
Semicolon: ";",
|
||||
Plus: "+",
|
||||
Dash: "-",
|
||||
Asterisk: "*",
|
||||
Slash: "/",
|
||||
OpenParen: "(",
|
||||
CloseParen: ")",
|
||||
OpenSquare: "[",
|
||||
CloseSquare: "]",
|
||||
OpenCurly: "{",
|
||||
CloseCurly: "}",
|
||||
Arrow: "->",
|
||||
DoubleArrow: "=>",
|
||||
PlusEqual: "+=",
|
||||
DashEqual: "-=",
|
||||
AsteriskEqual: "*=",
|
||||
SlashEqual: "/=",
|
||||
Hash: "#",
|
||||
}
|
||||
keyword {
|
||||
Let: "let",
|
||||
Import: "import",
|
||||
Fn: "fn",
|
||||
If: "if",
|
||||
Loop: "loop",
|
||||
While: "while",
|
||||
For: "for",
|
||||
Match: "match",
|
||||
Break: "break",
|
||||
Asm: "asm",
|
||||
}
|
||||
other {
|
||||
Ident(String),
|
||||
Lit(LitTy),
|
||||
}
|
||||
}
|
||||
|
||||
pub type TokenInst = Spanned<Token>;
|
||||
|
||||
pub struct Tokens<'a> {
|
||||
file: usize,
|
||||
chars: Peekable<CharIndices<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Tokens<'a> {
|
||||
pub fn new(code: &'a str, file: usize) -> Self {
|
||||
Self {
|
||||
file,
|
||||
chars: code.char_indices().peekable(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Tokens<'_> {
|
||||
type Item = Spanned<Token>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let (i, c) = self.chars.next()?;
|
||||
let mut span = Span {
|
||||
start: i,
|
||||
end: i,
|
||||
file: self.file,
|
||||
};
|
||||
if c.is_whitespace() {
|
||||
return self.next();
|
||||
}
|
||||
macro_rules! then {
|
||||
(_ => $def:expr, $($char:expr => $to:expr,)*) => {
|
||||
match self.chars.peek() {
|
||||
$(Some((_, $char)) => {
|
||||
self.chars.next();
|
||||
$to
|
||||
},)*
|
||||
_ => $def,
|
||||
}
|
||||
};
|
||||
}
|
||||
let inner = match c {
|
||||
'.' => Token::Dot,
|
||||
',' => Token::Comma,
|
||||
'(' => Token::OpenParen,
|
||||
')' => Token::CloseParen,
|
||||
'[' => Token::OpenSquare,
|
||||
']' => Token::CloseSquare,
|
||||
'{' => Token::OpenCurly,
|
||||
'}' => Token::CloseCurly,
|
||||
'#' => Token::Hash,
|
||||
'+' => then! {
|
||||
_ => Token::Plus,
|
||||
'=' => Token::PlusEqual,
|
||||
},
|
||||
'-' => then! {
|
||||
_ => Token::Dash,
|
||||
'=' => Token::DashEqual,
|
||||
'>' => Token::Arrow,
|
||||
},
|
||||
'*' => then! {
|
||||
_ => Token::Asterisk,
|
||||
'=' => Token::AsteriskEqual,
|
||||
},
|
||||
'/' => then! {
|
||||
_ => Token::Slash,
|
||||
'=' => Token::SlashEqual,
|
||||
},
|
||||
':' => Token::Colon,
|
||||
';' => Token::Semicolon,
|
||||
'=' => then! {
|
||||
_ => Token::Equal,
|
||||
'>' => Token::DoubleArrow,
|
||||
},
|
||||
'0'..='9' => {
|
||||
let mut s = c.to_string();
|
||||
while let Some((i, c)) = self.chars.peek()
|
||||
&& c.is_alphanumeric()
|
||||
{
|
||||
s.push(*c);
|
||||
span.end = *i;
|
||||
self.chars.next();
|
||||
}
|
||||
LitTy::Number(s).into()
|
||||
}
|
||||
'"' => {
|
||||
let mut s = String::new();
|
||||
while let Some((i, c)) = self.chars.next()
|
||||
&& !matches!(c, '"')
|
||||
{
|
||||
s.push(c);
|
||||
span.end = i;
|
||||
}
|
||||
LitTy::String(s).into()
|
||||
}
|
||||
_ => {
|
||||
let mut s = c.to_string();
|
||||
while let Some((i, c)) = self.chars.peek()
|
||||
&& c.is_alphanumeric()
|
||||
{
|
||||
s.push(*c);
|
||||
span.end = *i;
|
||||
self.chars.next();
|
||||
}
|
||||
match s.as_str() {
|
||||
"true" => LitTy::Bool(true).into(),
|
||||
"false" => LitTy::Bool(false).into(),
|
||||
_ => from_str(s),
|
||||
}
|
||||
}
|
||||
};
|
||||
Some(Spanned { inner, span })
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! def_tokens {
|
||||
{
|
||||
symbol {
|
||||
$($sym_name:ident: $sym_str:expr,)*
|
||||
}
|
||||
keyword {
|
||||
$($kw_name:ident: $kw_str:expr,)*
|
||||
}
|
||||
other {
|
||||
$($other_name:ident($data:ty),)*
|
||||
}
|
||||
} => {
|
||||
#[derive(PartialEq)]
|
||||
pub enum Token {
|
||||
$($sym_name,)*
|
||||
$($kw_name,)*
|
||||
$($other_name($data),)*
|
||||
}
|
||||
fn from_str(s: String) -> Token {
|
||||
match s.as_str() {
|
||||
$($kw_str => Token::$kw_name,)*
|
||||
_ => Token::Ident(s),
|
||||
}
|
||||
}
|
||||
impl std::fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
$(Token::$sym_name => write!(f, "{}", $sym_str),)*
|
||||
$(Token::$kw_name => write!(f, $kw_str),)*
|
||||
$(Token::$other_name(v) => write!(f, "{v}"),)*
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
use def_tokens;
|
||||
@@ -1,37 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CompilerMsg {
|
||||
msg: String,
|
||||
span: CharSpan,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct CharSpan {
|
||||
start: CharPos,
|
||||
end: CharPos,
|
||||
}
|
||||
|
||||
impl CharPos {
|
||||
pub fn to(self, end: CharPos) -> CharSpan {
|
||||
CharSpan { start: self, end }
|
||||
}
|
||||
}
|
||||
|
||||
impl CompilerMsg {
|
||||
pub fn new(msg: impl Into<String>, span: impl Into<CharSpan>) -> Self {
|
||||
Self {
|
||||
msg: msg.into(),
|
||||
span: span.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CharPos> for CharSpan {
|
||||
fn from(value: CharPos) -> Self {
|
||||
Self {
|
||||
start: value,
|
||||
end: value,
|
||||
}
|
||||
}
|
||||
}
|
||||
+28
-15
@@ -1,19 +1,32 @@
|
||||
mod ctx;
|
||||
mod token;
|
||||
mod tree;
|
||||
mod io;
|
||||
mod cursor;
|
||||
mod node;
|
||||
mod nodes;
|
||||
|
||||
pub use ctx::*;
|
||||
use token::*;
|
||||
pub use tree::*;
|
||||
pub use io::*;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn parse(file: &str) {
|
||||
let mut msgs = Vec::new();
|
||||
let mut parser = ParserCtx::new(file, &mut msgs);
|
||||
if let Some(block) = parser.parse_with::<PBlock>(false) {
|
||||
println!("{block:#?}");
|
||||
} else {
|
||||
println!("{msgs:?}");
|
||||
use cursor::*;
|
||||
pub use node::*;
|
||||
pub use nodes::*;
|
||||
|
||||
use crate::io::CompilerOutput;
|
||||
|
||||
pub fn parse_file(path: impl AsRef<Path>, output: &mut CompilerOutput) -> Option<Body> {
|
||||
let code = match std::fs::read_to_string(&path) {
|
||||
Ok(code) => code,
|
||||
Err(err) => {
|
||||
output.error(format!("Failed to read input file: {err}"));
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let file = output.files.len();
|
||||
output.files.push(path.as_ref().to_path_buf());
|
||||
let mut ctx = ParseCtx::new(Cursor::new(&code, file));
|
||||
let root = match ctx.parse() {
|
||||
Ok(v) => v,
|
||||
Err(msg) => {
|
||||
output.error(msg);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
Some(root)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
use crate::{
|
||||
io::{CompilerMsg, Span},
|
||||
parser::{
|
||||
Ident, Node,
|
||||
cursor::{Cursor, Lit, LitTy, Token},
|
||||
},
|
||||
};
|
||||
|
||||
pub struct ParseCtx<'a> {
|
||||
start: usize,
|
||||
cursor: Cursor<'a>,
|
||||
}
|
||||
|
||||
impl<'a> ParseCtx<'a> {
|
||||
pub fn new(cursor: Cursor<'a>) -> Self {
|
||||
Self { start: 0, cursor }
|
||||
}
|
||||
|
||||
pub fn parse_box<N: Node>(&mut self) -> Result<Box<N>, CompilerMsg> {
|
||||
self.parse_with(N::parse).map(Box::new)
|
||||
}
|
||||
|
||||
pub fn parse<N: Node>(&mut self) -> Result<N, CompilerMsg> {
|
||||
self.parse_with(N::parse)
|
||||
}
|
||||
|
||||
pub fn parse_with<N: Node>(
|
||||
&mut self,
|
||||
f: impl FnOnce(&mut Self) -> Result<N, CompilerMsg>,
|
||||
) -> Result<N, CompilerMsg> {
|
||||
let old_start = self.start;
|
||||
self.start = self.cursor.peek_start();
|
||||
let res = f(self);
|
||||
self.start = old_start;
|
||||
res
|
||||
}
|
||||
|
||||
pub fn ident(&mut self, s: String) -> Ident {
|
||||
let span = self.cursor.span;
|
||||
Ident { name: s, span }
|
||||
}
|
||||
|
||||
pub fn lit(&mut self, ty: LitTy) -> Lit {
|
||||
let span = self.cursor.span;
|
||||
Lit { ty, span }
|
||||
}
|
||||
|
||||
pub fn span(&mut self) -> Span {
|
||||
let end = self.cursor.cur_end();
|
||||
Span {
|
||||
file: self.cursor.file(),
|
||||
start: self.start,
|
||||
end,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list<N: Node>(&mut self, sep: Token, end: Token) -> Result<Vec<N>, CompilerMsg> {
|
||||
let mut list = Vec::new();
|
||||
if self.next_if(&end) {
|
||||
return Ok(list);
|
||||
}
|
||||
list.push(self.parse()?);
|
||||
while self.next_if(&sep) {
|
||||
list.push(self.parse()?);
|
||||
}
|
||||
self.expect(end)?;
|
||||
Ok(list)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::ops::Deref for ParseCtx<'a> {
|
||||
type Target = Cursor<'a>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.cursor
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::ops::DerefMut for ParseCtx<'a> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.cursor
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
use crate::parser::Node;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct DisplayCtx {
|
||||
pub indent: usize,
|
||||
}
|
||||
|
||||
pub struct NodeDsp<'a, N: Node> {
|
||||
pub node: &'a N,
|
||||
pub ctx: DisplayCtx,
|
||||
}
|
||||
|
||||
impl<N: Node> std::fmt::Display for NodeDsp<'_, N> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.node.fmt(f, self.ctx)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VecDsp<'a, N> {
|
||||
list: &'a Vec<N>,
|
||||
ctx: DisplayCtx,
|
||||
}
|
||||
|
||||
impl<N: Node> std::fmt::Display for VecDsp<'_, N> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
if let Some((last, rest)) = self.list.split_last() {
|
||||
for arg in rest {
|
||||
write!(f, "{}, ", arg.dsp(self.ctx))?;
|
||||
}
|
||||
write!(f, "{}", last.dsp(self.ctx))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait VecDspT<N> {
|
||||
fn dsp<'a, 'b>(&'a self, ctx: impl Into<DisplayCtx>) -> VecDsp<'b, N>
|
||||
where
|
||||
'a: 'b;
|
||||
}
|
||||
|
||||
impl<N> VecDspT<N> for Vec<N> {
|
||||
fn dsp<'a, 'b>(&'a self, ctx: impl Into<DisplayCtx>) -> VecDsp<'b, N>
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
let ctx = ctx.into();
|
||||
VecDsp { list: self, ctx }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
mod ctx;
|
||||
mod dsp;
|
||||
pub use ctx::*;
|
||||
pub use dsp::*;
|
||||
|
||||
use crate::io::CompilerMsg;
|
||||
|
||||
pub trait Node: Sized {
|
||||
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg>;
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result;
|
||||
fn dsp(&self, ctx: DisplayCtx) -> NodeDsp<'_, Self> {
|
||||
NodeDsp { node: self, ctx }
|
||||
}
|
||||
fn new_dsp(&self) -> NodeDsp<'_, Self> {
|
||||
self.dsp(DisplayCtx { indent: 0 })
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
use crate::{
|
||||
arch::x86_64::Code,
|
||||
parser::{Node, cursor::Token},
|
||||
};
|
||||
|
||||
pub mod x86_64;
|
||||
|
||||
pub enum AsmBlock {
|
||||
X86_64(Code),
|
||||
}
|
||||
|
||||
impl Node for AsmBlock {
|
||||
fn parse(ctx: &mut crate::parser::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
|
||||
ctx.expect(Token::OpenCurly)?;
|
||||
let asm = ctx.parse()?;
|
||||
ctx.expect(Token::CloseCurly)?;
|
||||
Ok(Self::X86_64(asm))
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result {
|
||||
write!(f, "asm {{ ... }}")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
use crate::{
|
||||
arch::x86_64::*,
|
||||
io::{CompilerMsg, Span},
|
||||
parser::{
|
||||
Node,
|
||||
cursor::{LitTy, Token},
|
||||
},
|
||||
};
|
||||
|
||||
impl Node for Code {
|
||||
fn parse(ctx: &mut crate::parser::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
|
||||
let mut c = Code::default();
|
||||
while let Some(Token::Ident(next)) = ctx.peek() {
|
||||
match next.as_str() {
|
||||
"mov" => {
|
||||
ctx.next();
|
||||
let dst = parse_reg(ctx)?;
|
||||
ctx.expect(Token::Comma)?;
|
||||
let src = parse_rmi(ctx)?;
|
||||
c.mov(dst, src)?;
|
||||
}
|
||||
"int" => {
|
||||
ctx.next();
|
||||
let Token::Lit(LitTy::Number(num)) = ctx.expect_next()? else {
|
||||
return Err("Expected an immediate".into());
|
||||
};
|
||||
let code = parse_imm(&num, ctx.span)?
|
||||
.try_into()
|
||||
.map_err(|_| CompilerMsg::from("Immediate must be a u8"))?;
|
||||
c.int(code);
|
||||
}
|
||||
_ => {
|
||||
let msg = format!("Unknown instruction {next}");
|
||||
ctx.next();
|
||||
return Err(CompilerMsg {
|
||||
msg,
|
||||
spans: vec![ctx.span],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(c)
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_imm(mut s: &str, span: Span) -> Result<Imm, CompilerMsg> {
|
||||
let mut radix = 10;
|
||||
let mut mult = 1;
|
||||
if s.starts_with('-') {
|
||||
mult = -1;
|
||||
s = &s[1..];
|
||||
}
|
||||
if s.starts_with("0x") {
|
||||
radix = 16;
|
||||
s = &s[2..];
|
||||
}
|
||||
let abs = u64::from_str_radix(s, radix)
|
||||
.map_err(|_| CompilerMsg::from(("invalid immediate", span)))?;
|
||||
let val = (abs as i128) * mult;
|
||||
Ok(Imm(val))
|
||||
}
|
||||
|
||||
pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result<RegMemImm, CompilerMsg> {
|
||||
let next = ctx.expect_next()?;
|
||||
let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register or immediate");
|
||||
Ok(match &next {
|
||||
Token::Ident(ident) => RegMemImm::Reg(Reg::parse(ident).ok_or_else(err)?),
|
||||
Token::Lit(LitTy::Number(num)) => RegMemImm::Imm(parse_imm(num, ctx.span)?),
|
||||
_ => return Err(err()),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse_reg(ctx: &mut crate::parser::ParseCtx) -> Result<Reg, CompilerMsg> {
|
||||
let next = ctx.expect_next()?;
|
||||
let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register");
|
||||
let Token::Ident(next) = &next else {
|
||||
return Err(err());
|
||||
};
|
||||
Reg::parse(next).ok_or_else(err)
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
use super::*;
|
||||
|
||||
pub struct Body {
|
||||
pub items: Vec<Expr>,
|
||||
pub final_semicolon: bool,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Node for Body {
|
||||
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
|
||||
let mut items = Vec::new();
|
||||
fn at_end(ctx: &mut ParseCtx) -> bool {
|
||||
ctx.peek().is_none_or(|t| *t == Token::CloseCurly)
|
||||
}
|
||||
let final_semicolon = loop {
|
||||
if at_end(ctx) {
|
||||
break true;
|
||||
}
|
||||
let expr: Expr = ctx.parse()?;
|
||||
let needs_semicolon = expr.needs_semicolon();
|
||||
items.push(expr);
|
||||
if at_end(ctx) {
|
||||
break false;
|
||||
}
|
||||
if needs_semicolon {
|
||||
ctx.expect(Token::Semicolon)?;
|
||||
}
|
||||
while ctx.next_if(Token::Semicolon) {}
|
||||
};
|
||||
Ok(Self {
|
||||
items,
|
||||
final_semicolon,
|
||||
span: ctx.span(),
|
||||
})
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
|
||||
if let Some((last, rest)) = self.items.split_last() {
|
||||
for i in rest {
|
||||
writeln!(
|
||||
f,
|
||||
"{}{}{}",
|
||||
" ".repeat(ctx.indent),
|
||||
i.dsp(ctx),
|
||||
if i.needs_semicolon() { ";" } else { "" }
|
||||
)?;
|
||||
}
|
||||
writeln!(
|
||||
f,
|
||||
"{}{}{}",
|
||||
" ".repeat(ctx.indent),
|
||||
last.dsp(ctx),
|
||||
if self.final_semicolon { ";" } else { "" }
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,273 @@
|
||||
use crate::parser::VecDspT;
|
||||
|
||||
pub use super::*;
|
||||
|
||||
pub struct Expr {
|
||||
pub span: Span,
|
||||
pub ty: ExprTy,
|
||||
}
|
||||
|
||||
pub enum ExprTy {
|
||||
Block(Body),
|
||||
Group(Box<Expr>),
|
||||
Member {
|
||||
of: Box<Expr>,
|
||||
field: Ident,
|
||||
},
|
||||
Ident(Ident),
|
||||
Lit(Lit),
|
||||
Negate(Box<Expr>),
|
||||
Call {
|
||||
target: Box<Expr>,
|
||||
args: Vec<Expr>,
|
||||
},
|
||||
Assign {
|
||||
target: Box<Expr>,
|
||||
val: Box<Expr>,
|
||||
},
|
||||
Define {
|
||||
target: Box<Expr>,
|
||||
ty: Option<Type>,
|
||||
const_: bool,
|
||||
val: Box<Expr>,
|
||||
},
|
||||
If {
|
||||
cond: Box<Expr>,
|
||||
body: Box<Expr>,
|
||||
},
|
||||
Loop {
|
||||
body: Box<Expr>,
|
||||
},
|
||||
While {
|
||||
cond: Box<Expr>,
|
||||
body: Box<Expr>,
|
||||
},
|
||||
Import(Ident),
|
||||
Fn(Box<Func>),
|
||||
Break,
|
||||
Asm(AsmBlock),
|
||||
}
|
||||
|
||||
impl Node for Expr {
|
||||
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
|
||||
let mut res = Self::unit(ctx)?;
|
||||
while let Some(next) = ctx.peek() {
|
||||
let ty = match next {
|
||||
Token::Equal => {
|
||||
ctx.next();
|
||||
let target = Box::new(res);
|
||||
let val = Box::new(ctx.parse_with(Self::unit)?);
|
||||
ExprTy::Assign { target, val }
|
||||
}
|
||||
Token::Colon => {
|
||||
ctx.next();
|
||||
let target = Box::new(res);
|
||||
let mut ty = None;
|
||||
let next = ctx.expect_peek()?;
|
||||
if !matches!(next, Token::Equal | Token::Colon) {
|
||||
ty = Some(ctx.parse()?);
|
||||
}
|
||||
let const_ = match ctx.expect_next()? {
|
||||
Token::Equal => false,
|
||||
Token::Colon => true,
|
||||
t => ctx.unexpected(t, "an equals = or colon :")?,
|
||||
};
|
||||
let val = Box::new(ctx.parse_with(Self::unit)?);
|
||||
ExprTy::Define {
|
||||
target,
|
||||
ty,
|
||||
val,
|
||||
const_,
|
||||
}
|
||||
}
|
||||
Token::OpenParen => {
|
||||
ctx.next();
|
||||
let target = Box::new(res);
|
||||
let args = ctx.list(Token::Comma, Token::CloseParen)?;
|
||||
ExprTy::Call { target, args }
|
||||
}
|
||||
Token::Dot => {
|
||||
ctx.next();
|
||||
let of = Box::new(res);
|
||||
let field = ctx.parse()?;
|
||||
ExprTy::Member { of, field }
|
||||
}
|
||||
_ => break,
|
||||
};
|
||||
res = Self {
|
||||
ty,
|
||||
span: ctx.span(),
|
||||
};
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
|
||||
self.ty.fmt(f, ctx)
|
||||
}
|
||||
}
|
||||
|
||||
impl ExprTy {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, mut ctx: DisplayCtx) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Ident(ident) => ident.fmt(f, ctx),
|
||||
Self::Group(expr) => write!(f, "({})", expr.dsp(ctx)),
|
||||
Self::Fn(func) => func.fmt(f, ctx),
|
||||
Self::Lit(lit) => write!(f, "{}", lit),
|
||||
Self::Negate(expr) => {
|
||||
write!(f, "-{}", expr.dsp(ctx))
|
||||
}
|
||||
Self::Call { target, args } => {
|
||||
write!(f, "{}({})", target.dsp(ctx), args.dsp(ctx))
|
||||
}
|
||||
Self::Assign { target, val } => {
|
||||
write!(f, "{} = {}", target.dsp(ctx), val.dsp(ctx))
|
||||
}
|
||||
Self::Define {
|
||||
target,
|
||||
ty,
|
||||
val,
|
||||
const_,
|
||||
} => {
|
||||
write!(f, "{} :", target.dsp(ctx))?;
|
||||
if let Some(ty) = ty {
|
||||
write!(f, " {} ", ty.dsp(ctx))?;
|
||||
}
|
||||
write!(f, "{} {}", if *const_ { ":" } else { "=" }, val.dsp(ctx))
|
||||
}
|
||||
Self::Member { of, field } => {
|
||||
write!(f, "{}.{field}", of.dsp(ctx))
|
||||
}
|
||||
Self::If { cond, body } => {
|
||||
write!(f, "if {} {}", cond.dsp(ctx), body.dsp(ctx))
|
||||
}
|
||||
Self::While { cond, body } => {
|
||||
write!(f, "while {} {}", cond.dsp(ctx), body.dsp(ctx))
|
||||
}
|
||||
Self::Loop { body } => {
|
||||
write!(f, "loop {}", body.dsp(ctx))
|
||||
}
|
||||
Self::Block(body) => {
|
||||
write!(f, "{{")?;
|
||||
if !body.items.is_empty() {
|
||||
writeln!(f)?;
|
||||
ctx.indent += 3;
|
||||
body.fmt(f, ctx)?;
|
||||
}
|
||||
write!(f, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
Self::Import(ident) => {
|
||||
write!(f, "import {ident}")
|
||||
}
|
||||
Self::Break => {
|
||||
write!(f, "break")
|
||||
}
|
||||
Self::Asm(asm) => asm.fmt(f, ctx),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Expr {
|
||||
pub fn fmt_body(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
|
||||
match self.ty {
|
||||
ExprTy::Block(_) => self.fmt(f, ctx),
|
||||
_ => write!(f, "=> {}", self.dsp(ctx)),
|
||||
}
|
||||
}
|
||||
|
||||
fn unit(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
|
||||
let ty = match ctx.expect_next()? {
|
||||
Token::Dash => ExprTy::Negate(ctx.parse_box()?),
|
||||
Token::Ident(s) => ExprTy::Ident(ctx.ident(s)),
|
||||
Token::Lit(l) => ExprTy::Lit(ctx.lit(l)),
|
||||
Token::Fn => ExprTy::Fn(ctx.parse_box()?),
|
||||
Token::If => {
|
||||
let cond = ctx.parse_box()?;
|
||||
let body = Box::new(Self::body(ctx)?);
|
||||
ExprTy::If { cond, body }
|
||||
}
|
||||
Token::While => {
|
||||
let cond = ctx.parse_box()?;
|
||||
let body = Box::new(Self::body(ctx)?);
|
||||
ExprTy::While { cond, body }
|
||||
}
|
||||
Token::Loop => {
|
||||
let body = ctx.parse_box()?;
|
||||
ExprTy::Loop { body }
|
||||
}
|
||||
Token::OpenParen => {
|
||||
if ctx.next_if(Token::CloseParen) {
|
||||
ExprTy::Lit(Lit {
|
||||
ty: LitTy::Unit,
|
||||
span: ctx.span(),
|
||||
})
|
||||
} else {
|
||||
let inner = ctx.parse_box()?;
|
||||
ctx.expect(Token::CloseParen)?;
|
||||
ExprTy::Group(inner)
|
||||
}
|
||||
}
|
||||
Token::OpenCurly => {
|
||||
let body = ctx.parse()?;
|
||||
ctx.expect(Token::CloseCurly)?;
|
||||
ExprTy::Block(body)
|
||||
}
|
||||
Token::Break => ExprTy::Break,
|
||||
Token::Import => {
|
||||
let ident = ctx.parse()?;
|
||||
ExprTy::Import(ident)
|
||||
}
|
||||
Token::Asm => ExprTy::Asm(ctx.parse()?),
|
||||
other => return ctx.unexpected(other, "an expression"),
|
||||
};
|
||||
Ok(Self {
|
||||
ty,
|
||||
span: ctx.span(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_group(&self) -> bool {
|
||||
matches!(self.ty, ExprTy::Group(_))
|
||||
}
|
||||
|
||||
pub fn is_block(&self) -> bool {
|
||||
matches!(self.ty, ExprTy::Block(_))
|
||||
}
|
||||
|
||||
pub fn block(ctx: &mut ParseCtx) -> Result<Expr, CompilerMsg> {
|
||||
ctx.expect(Token::OpenCurly)?;
|
||||
let id = ctx.parse()?;
|
||||
ctx.expect(Token::CloseCurly)?;
|
||||
Ok(Expr {
|
||||
ty: ExprTy::Block(id),
|
||||
span: ctx.span(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn body(ctx: &mut ParseCtx) -> Result<Expr, CompilerMsg> {
|
||||
if ctx.next_if(Token::DoubleArrow) {
|
||||
ctx.parse()
|
||||
} else {
|
||||
ctx.parse_with(Expr::block)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ends_with_block(&self) -> bool {
|
||||
match &self.ty {
|
||||
ExprTy::Block(..) => true,
|
||||
ExprTy::Loop { body }
|
||||
| ExprTy::While { body, .. }
|
||||
| ExprTy::If { body, .. }
|
||||
| ExprTy::Negate(body)
|
||||
| ExprTy::Assign { val: body, .. } => body.ends_with_block(),
|
||||
ExprTy::Define { val: body, .. } => body.ends_with_block(),
|
||||
ExprTy::Fn(f) => f.ends_with_block(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn needs_semicolon(&self) -> bool {
|
||||
!self.ends_with_block()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
use super::*;
|
||||
|
||||
pub struct Func {
|
||||
args: Vec<Param>,
|
||||
ret: Option<Type>,
|
||||
body: Expr,
|
||||
span: Span,
|
||||
}
|
||||
|
||||
impl Node for Func {
|
||||
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
|
||||
ctx.expect(Token::OpenParen)?;
|
||||
let args = ctx.list(Token::Comma, Token::CloseParen)?;
|
||||
let mut ret = None;
|
||||
if ctx.next_if(Token::Arrow) {
|
||||
ret = Some(ctx.parse()?);
|
||||
}
|
||||
let body = Expr::body(ctx)?;
|
||||
Ok(Self {
|
||||
args,
|
||||
ret,
|
||||
body,
|
||||
span: ctx.span(),
|
||||
})
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
|
||||
write!(f, "fn")?;
|
||||
write!(f, "(")?;
|
||||
if let Some((last, rest)) = self.args.split_last() {
|
||||
for arg in rest {
|
||||
write!(f, "{}, ", arg.dsp(ctx))?;
|
||||
}
|
||||
write!(f, "{}", last.dsp(ctx))?;
|
||||
}
|
||||
write!(f, ") ")?;
|
||||
if let Some(ret) = &self.ret {
|
||||
write!(f, "-> {} ", ret.dsp(ctx))?;
|
||||
}
|
||||
self.body.fmt_body(f, ctx)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Func {
|
||||
pub fn ends_with_block(&self) -> bool {
|
||||
self.body.ends_with_block()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
use super::*;
|
||||
|
||||
pub struct Ident {
|
||||
pub name: String,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Node for Ident {
|
||||
fn parse(ctx: &mut super::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
|
||||
match ctx.expect_next()? {
|
||||
Token::Ident(ident) => Ok(ctx.ident(ident)),
|
||||
t => ctx.unexpected(t, "an identifier"),
|
||||
}
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, _: DisplayCtx) -> std::fmt::Result {
|
||||
write!(f, "{}", self.name)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Ident {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.name.fmt(f)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
use super::*;
|
||||
|
||||
pub struct Item {
|
||||
pub ty: ItemTy,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
pub enum ItemTy {
|
||||
Let {
|
||||
name: Ident,
|
||||
ty: Option<Type>,
|
||||
val: Expr,
|
||||
},
|
||||
Fn(Func),
|
||||
Expr(Expr),
|
||||
Import(Ident),
|
||||
}
|
||||
|
||||
impl Node for Item {
|
||||
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
|
||||
let ty = match ctx.expect_peek()? {
|
||||
Token::Fn => {
|
||||
ctx.next();
|
||||
ItemTy::Fn(ctx.parse()?)
|
||||
}
|
||||
Token::Let => {
|
||||
ctx.next();
|
||||
let name = ctx.parse()?;
|
||||
let mut ty = None;
|
||||
if ctx.next_if(Token::Colon) {
|
||||
ty = Some(ctx.parse()?);
|
||||
}
|
||||
ctx.expect(Token::Equal)?;
|
||||
let val = ctx.parse()?;
|
||||
ItemTy::Let { name, ty, val }
|
||||
}
|
||||
Token::Import => {
|
||||
ctx.next();
|
||||
ItemTy::Import(ctx.parse()?)
|
||||
}
|
||||
_ => ItemTy::Expr(ctx.parse()?),
|
||||
};
|
||||
Ok(Self {
|
||||
ty,
|
||||
span: ctx.span(),
|
||||
})
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
|
||||
match &self.ty {
|
||||
ItemTy::Fn(func) => func.fmt(f, ctx)?,
|
||||
ItemTy::Let { name, ty, val } => {
|
||||
write!(f, "let {}", name.dsp(ctx))?;
|
||||
if let Some(ty) = ty {
|
||||
write!(f, ": {}", ty.dsp(ctx))?;
|
||||
}
|
||||
write!(f, " = {}", val.dsp(ctx))?;
|
||||
}
|
||||
ItemTy::Expr(expr) => expr.fmt(f, ctx)?,
|
||||
ItemTy::Import(ident) => write!(f, "import {}", ident.dsp(ctx))?,
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Item {
|
||||
pub fn ends_with_block(&self) -> bool {
|
||||
match &self.ty {
|
||||
ItemTy::Let { val, .. } => val.ends_with_block(),
|
||||
ItemTy::Expr(id) => id.ends_with_block(),
|
||||
ItemTy::Fn(f) => f.ends_with_block(),
|
||||
ItemTy::Import(ident) => false,
|
||||
}
|
||||
}
|
||||
pub fn needs_semicolon(&self) -> bool {
|
||||
!self.ends_with_block()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
mod asm;
|
||||
mod body;
|
||||
mod expr;
|
||||
mod func;
|
||||
mod ident;
|
||||
mod param;
|
||||
mod struct_;
|
||||
mod ty;
|
||||
pub use asm::*;
|
||||
pub use body::*;
|
||||
pub use expr::*;
|
||||
pub use func::*;
|
||||
pub use ident::*;
|
||||
pub use param::*;
|
||||
pub use ty::*;
|
||||
|
||||
use super::{DisplayCtx, Lit, LitTy, Node, ParseCtx, Token};
|
||||
use crate::io::{CompilerMsg, Span};
|
||||
@@ -0,0 +1,25 @@
|
||||
use super::*;
|
||||
|
||||
pub struct Param {
|
||||
name: Ident,
|
||||
ty: Option<Type>,
|
||||
}
|
||||
|
||||
impl Node for Param {
|
||||
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
|
||||
let name = ctx.parse()?;
|
||||
let mut ty = None;
|
||||
if ctx.next_if(Token::Colon) {
|
||||
ty = Some(ctx.parse()?);
|
||||
}
|
||||
Ok(Self { name, ty })
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
|
||||
self.name.fmt(f, ctx)?;
|
||||
if let Some(ty) = &self.ty {
|
||||
write!(f, ": {}", ty.dsp(ctx))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
use super::*;
|
||||
|
||||
pub struct Struct {
|
||||
name: String,
|
||||
fields: Vec<Field>,
|
||||
}
|
||||
|
||||
pub struct Field {}
|
||||
@@ -0,0 +1,20 @@
|
||||
use super::*;
|
||||
|
||||
pub enum Type {
|
||||
Ident(Ident),
|
||||
}
|
||||
|
||||
impl Node for Type {
|
||||
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
|
||||
Ok(match ctx.expect_next()? {
|
||||
Token::Ident(s) => Self::Ident(ctx.ident(s)),
|
||||
t => ctx.unexpected(t, "a type")?,
|
||||
})
|
||||
}
|
||||
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
|
||||
match self {
|
||||
Type::Ident(id) => id.fmt(f, ctx),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
use std::{iter::Peekable, str::Chars};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CharIter<'a> {
|
||||
iter: Peekable<Chars<'a>>,
|
||||
pos: CharPos,
|
||||
next_pos: CharPos,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
pub struct CharPos {
|
||||
line: usize,
|
||||
col: usize,
|
||||
}
|
||||
|
||||
impl<'a> CharIter<'a> {
|
||||
pub fn new(text: &'a str) -> Self {
|
||||
Self {
|
||||
iter: text.chars().peekable(),
|
||||
pos: CharPos::default(),
|
||||
next_pos: CharPos::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Option<char> {
|
||||
let next = self.iter.next();
|
||||
self.advance(next)
|
||||
}
|
||||
|
||||
fn advance(&mut self, c: Option<char>) -> Option<char> {
|
||||
self.pos = self.next_pos;
|
||||
if let Some(c) = c {
|
||||
if c == '\n' {
|
||||
self.next_pos.line += 1;
|
||||
self.next_pos.col = 0;
|
||||
} else {
|
||||
self.next_pos.col += 1;
|
||||
}
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
pub fn peek(&mut self) -> Option<char> {
|
||||
self.iter.peek().copied()
|
||||
}
|
||||
|
||||
pub fn next_if(&mut self, f: impl FnOnce(&char) -> bool) -> Option<char> {
|
||||
let next = self.iter.next_if(f);
|
||||
self.advance(next)
|
||||
}
|
||||
|
||||
pub fn pos(&self) -> CharPos {
|
||||
self.pos
|
||||
}
|
||||
|
||||
pub fn until(&mut self, until: char) -> Option<String> {
|
||||
let mut str = String::new();
|
||||
let mut next = self.next()?;
|
||||
while next != until {
|
||||
str.push(next);
|
||||
next = self.next()?;
|
||||
}
|
||||
Some(str)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for CharIter<'a> {
|
||||
fn from(value: &'a str) -> Self {
|
||||
Self::new(value)
|
||||
}
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Keyword {
|
||||
Let,
|
||||
Fn,
|
||||
}
|
||||
|
||||
impl Keyword {
|
||||
pub fn parse(ident: &str) -> Option<Self> {
|
||||
Some(match ident {
|
||||
"let" => Self::Let,
|
||||
"fn" => Self::Fn,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Keyword> for Token {
|
||||
fn from(value: Keyword) -> Self {
|
||||
Token::Keyword(value)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Literal {
|
||||
String(String),
|
||||
}
|
||||
@@ -1,134 +0,0 @@
|
||||
use super::io::*;
|
||||
|
||||
mod chr;
|
||||
mod kw;
|
||||
mod lit;
|
||||
mod symbol;
|
||||
|
||||
pub use chr::*;
|
||||
pub use kw::*;
|
||||
pub use lit::*;
|
||||
pub use symbol::*;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Token {
|
||||
Lit(Literal),
|
||||
Keyword(Keyword),
|
||||
Ident(String),
|
||||
Symbol(Symbol),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TokenCursor<'a> {
|
||||
iter: CharIter<'a>,
|
||||
prev_sym: Option<Symbol>,
|
||||
next: Option<(Token, CharSpan)>,
|
||||
next_start: CharPos,
|
||||
prev_end: CharPos,
|
||||
}
|
||||
|
||||
impl<'a> TokenCursor<'a> {
|
||||
pub fn new(iter: impl Into<CharIter<'a>>) -> Self {
|
||||
let mut s = Self {
|
||||
next: None,
|
||||
prev_sym: None,
|
||||
iter: iter.into(),
|
||||
next_start: CharPos::default(),
|
||||
prev_end: CharPos::default(),
|
||||
};
|
||||
s.next();
|
||||
s
|
||||
}
|
||||
|
||||
pub fn next(&mut self) -> Option<Token> {
|
||||
self.next_span().map(|n| n.0)
|
||||
}
|
||||
|
||||
pub fn next_span(&mut self) -> Option<(Token, CharSpan)> {
|
||||
self.prev_end = self.iter.pos();
|
||||
self.prev_sym = self.next.as_ref().and_then(|n| match n.0 {
|
||||
Token::Symbol(s) => Some(s),
|
||||
_ => None,
|
||||
});
|
||||
while self.iter.next_if(|c| c.is_whitespace()).is_some() {}
|
||||
self.next_start = self.iter.pos();
|
||||
std::mem::replace(&mut self.next, Self::get_next(&mut self.iter))
|
||||
}
|
||||
|
||||
fn get_next(iter: &mut CharIter) -> Option<(Token, CharSpan)> {
|
||||
while iter.next_if(|c| c.is_whitespace()).is_some() {}
|
||||
if let Some(c) = iter.next() {
|
||||
let start = iter.pos();
|
||||
let val = Self::get_next_inner(iter, c);
|
||||
let span = start.to(iter.pos());
|
||||
val.map(|v| (v, span))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn get_next_inner(iter: &mut CharIter, c: char) -> Option<Token> {
|
||||
if c == '"' {
|
||||
return iter.until('"').map(|s| Token::Lit(Literal::String(s)));
|
||||
}
|
||||
if let Some(sym) = Symbol::parse(c, iter) {
|
||||
return Some(Token::Symbol(sym));
|
||||
}
|
||||
let mut ident = c.to_string();
|
||||
while let Some(c) = iter.next_if(|c| !c.is_whitespace() && Symbol::parse_char(*c).is_none())
|
||||
{
|
||||
ident.push(c);
|
||||
}
|
||||
Some(if let Some(kw) = Keyword::parse(&ident) {
|
||||
Token::Keyword(kw)
|
||||
} else {
|
||||
Token::Ident(ident)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn peek(&self) -> Option<&Token> {
|
||||
self.peek_span().map(|v| v.0)
|
||||
}
|
||||
|
||||
pub fn peek_span(&self) -> Option<(&Token, CharSpan)> {
|
||||
self.next.as_ref().map(|(t, s)| (t, *s))
|
||||
}
|
||||
|
||||
pub fn next_if(&mut self, f: impl FnOnce(&Token) -> bool) -> Option<Token> {
|
||||
if self.peek().is_some_and(f) {
|
||||
self.next()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_is(&mut self, token: impl Into<Token>) -> bool {
|
||||
self.next_is_ref(&token.into())
|
||||
}
|
||||
|
||||
pub fn peek_is(&mut self, token: impl Into<Token>) -> bool {
|
||||
self.peek().is_some_and(|t| *t == token.into())
|
||||
}
|
||||
|
||||
pub fn next_is_ref(&mut self, token: &Token) -> bool {
|
||||
self.next_if(|t| t == token).is_some()
|
||||
}
|
||||
|
||||
pub fn next_start(&self) -> CharPos {
|
||||
self.next_start
|
||||
}
|
||||
|
||||
pub fn prev_end(&self) -> CharPos {
|
||||
self.prev_end
|
||||
}
|
||||
|
||||
pub fn prev_sym(&self) -> Option<Symbol> {
|
||||
self.prev_sym
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Into<CharIter<'a>>> From<T> for TokenCursor<'a> {
|
||||
fn from(value: T) -> Self {
|
||||
Self::new(value.into())
|
||||
}
|
||||
}
|
||||
@@ -1,83 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum Symbol {
|
||||
// 1 char
|
||||
OpenParen,
|
||||
CloseParen,
|
||||
OpenCurly,
|
||||
CloseCurly,
|
||||
Plus,
|
||||
Minus,
|
||||
Slash,
|
||||
Asterisk,
|
||||
Equal,
|
||||
Colon,
|
||||
Semicolon,
|
||||
SingleQuote,
|
||||
Comma,
|
||||
// 2 chars
|
||||
Arrow,
|
||||
DoubleArrow,
|
||||
}
|
||||
|
||||
impl Symbol {
|
||||
pub fn parse(c: char, iter: &mut CharIter) -> Option<Self> {
|
||||
Self::parse_char(c).map(|s| s.parse_rest(iter))
|
||||
}
|
||||
pub fn parse_char(c: char) -> Option<Self> {
|
||||
Some(match c {
|
||||
'(' => Symbol::OpenParen,
|
||||
')' => Symbol::CloseParen,
|
||||
'{' => Symbol::OpenCurly,
|
||||
'}' => Symbol::CloseCurly,
|
||||
'+' => Symbol::Plus,
|
||||
'-' => Symbol::Minus,
|
||||
'/' => Symbol::Slash,
|
||||
'*' => Symbol::Asterisk,
|
||||
'=' => Symbol::Equal,
|
||||
':' => Symbol::Colon,
|
||||
';' => Symbol::Semicolon,
|
||||
'\'' => Symbol::SingleQuote,
|
||||
',' => Symbol::Comma,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
pub fn parse_rest(mut self, iter: &mut CharIter) -> Self {
|
||||
let Some(next) = iter.peek() else {
|
||||
return self;
|
||||
};
|
||||
match (self, next) {
|
||||
(Symbol::Minus, '>') => self = Symbol::Arrow,
|
||||
(Symbol::Equal, '>') => self = Symbol::DoubleArrow,
|
||||
_ => return self,
|
||||
}
|
||||
iter.next();
|
||||
self
|
||||
}
|
||||
pub fn str(&self) -> &'static str {
|
||||
match self {
|
||||
Symbol::OpenParen => "(",
|
||||
Symbol::CloseParen => ")",
|
||||
Symbol::OpenCurly => "{",
|
||||
Symbol::CloseCurly => "}",
|
||||
Symbol::Plus => "+",
|
||||
Symbol::Minus => "-",
|
||||
Symbol::Slash => "/",
|
||||
Symbol::Asterisk => "*",
|
||||
Symbol::Equal => "=",
|
||||
Symbol::Colon => ":",
|
||||
Symbol::Semicolon => ";",
|
||||
Symbol::SingleQuote => "'",
|
||||
Symbol::Arrow => "->",
|
||||
Symbol::DoubleArrow => "=>",
|
||||
Symbol::Comma => ",",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Symbol> for Token {
|
||||
fn from(value: Symbol) -> Self {
|
||||
Token::Symbol(value)
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PBlock {
|
||||
statements: Vec<Node<PStatement>>,
|
||||
return_last: bool,
|
||||
}
|
||||
|
||||
impl Parsable for PBlock {
|
||||
type Data = bool;
|
||||
|
||||
fn parse(ctx: &mut ParserCtx, curlies: bool) -> ParseResult<Self> {
|
||||
let end = if curlies {
|
||||
ctx.expect(Symbol::OpenCurly)?;
|
||||
Some(Symbol::CloseCurly)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let res = ctx.parse_list(
|
||||
end,
|
||||
SepCheck::new(Symbol::Semicolon)
|
||||
.dup(true)
|
||||
.skip_if(|ctx, _| ctx.prev_sym().is_some_and(|s| s == Symbol::CloseCurly)),
|
||||
)?;
|
||||
ParseResult::Ok(Self {
|
||||
statements: res.nodes,
|
||||
return_last: res.last_sep,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum PExpr {
|
||||
Ident(String),
|
||||
Lit(Literal),
|
||||
Block(Node<PBlock>),
|
||||
Group(BNode<PExpr>),
|
||||
Unit,
|
||||
}
|
||||
|
||||
impl Parsable for PExpr {
|
||||
fn parse(ctx: &mut ParserCtx, _: ()) -> ParseResult<Self> {
|
||||
Self::parse_unit(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
impl PExpr {
|
||||
fn parse_unit(ctx: &mut ParserCtx) -> ParseResult<Self> {
|
||||
ParseResult::Ok(match ctx.expect_peek()? {
|
||||
Token::Lit(lit) => {
|
||||
let res = PExpr::Lit(lit.clone());
|
||||
ctx.next();
|
||||
res
|
||||
}
|
||||
Token::Ident(ident) => {
|
||||
let res = PExpr::Ident(ident.to_string());
|
||||
ctx.next();
|
||||
res
|
||||
}
|
||||
Token::Symbol(symbol) => match symbol {
|
||||
Symbol::OpenParen => {
|
||||
ctx.expect_next()?;
|
||||
if ctx.next_is(Symbol::CloseParen) {
|
||||
PExpr::Unit
|
||||
} else {
|
||||
let inner = ctx.parse();
|
||||
let Some(inner) = inner else {
|
||||
ctx.seek(Symbol::CloseParen);
|
||||
return ParseResult::SubErr;
|
||||
};
|
||||
ctx.expect(Symbol::CloseParen)?;
|
||||
PExpr::Group(inner.bx())
|
||||
}
|
||||
}
|
||||
Symbol::OpenCurly => PExpr::Block(ctx.parse_with(true)?),
|
||||
_ => return ctx.unexpected("expression").res(),
|
||||
},
|
||||
_ => return ctx.unexpected("expression").res(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PFunc {
|
||||
name: Node<PIdent>,
|
||||
args: Vec<Node<PVarDef>>,
|
||||
body: Node<PExpr>,
|
||||
}
|
||||
|
||||
impl Parsable for PFunc {
|
||||
type Data = ();
|
||||
|
||||
fn parse(ctx: &mut ParserCtx, _: Self::Data) -> ParseResult<Self> {
|
||||
ctx.expect(Keyword::Fn)?;
|
||||
let name = ctx.parse()?;
|
||||
ctx.expect(Symbol::OpenParen)?;
|
||||
let args = ctx.parse_list(Some(Symbol::CloseParen), SepCheck::new(Symbol::Comma))?;
|
||||
let body = ctx.parse()?;
|
||||
ParseResult::Ok(Self {
|
||||
name,
|
||||
args: args.nodes,
|
||||
body,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
use super::*;
|
||||
use std::ops::Deref;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PIdent(String);
|
||||
|
||||
impl Deref for PIdent {
|
||||
type Target = String;
|
||||
|
||||
fn deref(&self) -> &String {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Parsable for PIdent {
|
||||
fn parse(ctx: &mut ParserCtx, _: ()) -> ParseResult<Self> {
|
||||
if let Token::Ident(ident) = ctx.expect_peek()? {
|
||||
let ident = ident.clone();
|
||||
ctx.next();
|
||||
ParseResult::Ok(Self(ident))
|
||||
} else {
|
||||
ctx.unexpected("identifier").res()
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,128 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
pub struct ListRes<T> {
|
||||
pub nodes: Vec<Node<T>>,
|
||||
pub last_sep: bool,
|
||||
}
|
||||
|
||||
pub trait BetweenFn<T> {
|
||||
fn run(&mut self, ctx: &mut ParserCtx, prev: &Node<T>) -> bool;
|
||||
}
|
||||
|
||||
impl ParserCtx<'_> {
|
||||
pub fn parse_list<T: Parsable<Data = ()>>(
|
||||
&mut self,
|
||||
end: Option<impl Into<Token>>,
|
||||
mut between: impl BetweenFn<T>,
|
||||
) -> Option<ListRes<T>> {
|
||||
let end = end.map(|t| t.into());
|
||||
let mut nodes = Vec::new();
|
||||
let mut last_sep = false;
|
||||
macro_rules! abort {
|
||||
() => {
|
||||
if end.is_some_and(|t| self.seek(t)) {
|
||||
break;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
};
|
||||
}
|
||||
macro_rules! check_end {
|
||||
() => {
|
||||
if end.as_ref().is_some_and(|t| self.next_is_ref(t))
|
||||
|| (end.is_none() && self.peek().is_none())
|
||||
{
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
||||
loop {
|
||||
check_end!();
|
||||
last_sep = false;
|
||||
nodes.push(match self.parse() {
|
||||
Some(node) => node,
|
||||
None => abort!(),
|
||||
});
|
||||
check_end!();
|
||||
if between.run(self, nodes.last().unwrap()) {
|
||||
abort!();
|
||||
}
|
||||
last_sep = true;
|
||||
}
|
||||
Some(ListRes { nodes, last_sep })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SepCheck {
|
||||
pub sep: Token,
|
||||
pub dup: bool,
|
||||
}
|
||||
|
||||
impl<T> BetweenFn<T> for SepCheck {
|
||||
fn run(&mut self, ctx: &mut ParserCtx, prev: &Node<T>) -> bool {
|
||||
let Some(next) = ctx.expect_next() else {
|
||||
return true;
|
||||
};
|
||||
if next != self.sep {
|
||||
ctx.msgs
|
||||
.push(ctx.unexpected(format!("Expected {:?}", self.sep)));
|
||||
return true;
|
||||
}
|
||||
if self.dup {
|
||||
while ctx.next_is_ref(&self.sep) {}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl SepCheck {
|
||||
pub fn new(sep: impl Into<Token>) -> Self {
|
||||
Self {
|
||||
sep: sep.into(),
|
||||
dup: false,
|
||||
}
|
||||
}
|
||||
pub fn dup(mut self, dup: bool) -> Self {
|
||||
self.dup = dup;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SkipFn<T> = Fn(&mut ParserCtx, &Node<T>) -> bool;
|
||||
|
||||
// I hate everything. sepcheck is fine, this is not
|
||||
pub struct SkipIf<T, F: SkipFn<T>, B: BetweenFn<T>> {
|
||||
f: F,
|
||||
inner: B,
|
||||
_pd: std::marker::PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T, F: SkipFn<T>, B: BetweenFn<T>> SkipIf<T, F, B> {
|
||||
pub fn new(f: F, run: B) -> Self {
|
||||
Self {
|
||||
f,
|
||||
inner: run,
|
||||
_pd: std::marker::PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, F: SkipFn<T>, B: BetweenFn<T>> BetweenFn<T> for SkipIf<T, F, B> {
|
||||
fn run(&mut self, ctx: &mut ParserCtx, prev: &Node<T>) -> bool {
|
||||
if (self.f)(ctx, prev) {
|
||||
false
|
||||
} else {
|
||||
self.inner.run(ctx, prev)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait BetweenFnUtil<T>: BetweenFn<T> + Sized {
|
||||
fn skip_if<F: SkipFn<T>>(self, f: F) -> SkipIf<T, F, Self>;
|
||||
}
|
||||
|
||||
impl<B: BetweenFn<T>, T> BetweenFnUtil<T> for B {
|
||||
fn skip_if<F: SkipFn<T>>(self, f: F) -> SkipIf<T, F, Self> {
|
||||
SkipIf::new(f, self)
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
mod block;
|
||||
mod expr;
|
||||
mod func;
|
||||
mod ident;
|
||||
mod list;
|
||||
mod node;
|
||||
mod statement;
|
||||
mod ty;
|
||||
mod vardef;
|
||||
|
||||
pub use block::*;
|
||||
pub use expr::*;
|
||||
pub use func::*;
|
||||
pub use ident::*;
|
||||
pub use list::*;
|
||||
pub use node::*;
|
||||
pub use statement::*;
|
||||
pub use ty::*;
|
||||
pub use vardef::*;
|
||||
|
||||
use super::*;
|
||||
@@ -1,63 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
pub struct Node<T> {
|
||||
pub data: Option<T>,
|
||||
}
|
||||
|
||||
pub type BNode<T> = Box<Node<T>>;
|
||||
|
||||
pub enum ParseResult<T> {
|
||||
Ok(T),
|
||||
Node(Node<T>),
|
||||
Continue(CompilerMsg),
|
||||
Break(CompilerMsg),
|
||||
SubErr,
|
||||
}
|
||||
|
||||
pub trait Parsable: Sized {
|
||||
type Data = ();
|
||||
fn parse(ctx: &mut ParserCtx, data: Self::Data) -> ParseResult<Self>;
|
||||
}
|
||||
|
||||
impl<T> Node<T> {
|
||||
pub fn bx(self) -> Box<Self> {
|
||||
Box::new(self)
|
||||
}
|
||||
pub fn map<U>(self, f: impl FnOnce(T) -> U) -> Node<U> {
|
||||
Node {
|
||||
data: self.data.map(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use std::convert::Infallible;
|
||||
impl<T> std::ops::FromResidual<Option<Infallible>> for ParseResult<T> {
|
||||
fn from_residual(residual: Option<Infallible>) -> Self {
|
||||
match residual {
|
||||
None => ParseResult::SubErr,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> std::ops::FromResidual<Result<Infallible, CompilerMsg>> for ParseResult<T> {
|
||||
fn from_residual(residual: Result<Infallible, CompilerMsg>) -> Self {
|
||||
match residual {
|
||||
Err(msg) => ParseResult::Break(msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CompilerMsg {
|
||||
pub fn res<T>(self) -> ParseResult<T> {
|
||||
ParseResult::Break(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: std::fmt::Debug> std::fmt::Debug for Node<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
if let Some(d) = &self.data {
|
||||
d.fmt(f)
|
||||
} else {
|
||||
f.write_str("{error}")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum PStatement {
|
||||
Expr(PExpr),
|
||||
Let(Node<PVarDef>, Node<PExpr>),
|
||||
Fn(PFunc),
|
||||
}
|
||||
|
||||
impl Parsable for PStatement {
|
||||
fn parse(ctx: &mut ParserCtx, _: ()) -> ParseResult<Self> {
|
||||
let res = match ctx.expect_peek()? {
|
||||
Token::Keyword(kw) => match kw {
|
||||
Keyword::Let => {
|
||||
ctx.next();
|
||||
let name = ctx.parse()?;
|
||||
ctx.expect(Symbol::Equal)?;
|
||||
let body = ctx.parse()?;
|
||||
Self::Let(name, body)
|
||||
}
|
||||
Keyword::Fn => return ParseResult::Node(ctx.parse()?.map(PStatement::Fn)),
|
||||
},
|
||||
_ => return ParseResult::Node(ctx.parse()?.map(PStatement::Expr)),
|
||||
};
|
||||
ParseResult::Ok(res)
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PType {
|
||||
name: Node<PIdent>,
|
||||
}
|
||||
|
||||
impl Parsable for PType {
|
||||
fn parse(ctx: &mut ParserCtx, _: Self::Data) -> ParseResult<Self> {
|
||||
ParseResult::Ok(Self { name: ctx.parse()? })
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
use super::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PVarDef {
|
||||
name: Node<PIdent>,
|
||||
ty: Option<Node<PType>>,
|
||||
}
|
||||
|
||||
impl Parsable for PVarDef {
|
||||
fn parse(ctx: &mut ParserCtx, _: Self::Data) -> ParseResult<Self> {
|
||||
let name = ctx.parse()?;
|
||||
let mut ty = None;
|
||||
if ctx.next_is(Symbol::Colon) {
|
||||
ty = Some(ctx.parse()?);
|
||||
}
|
||||
ParseResult::Ok(Self { name, ty })
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
io::{CompilerMsg, CompilerOutput, Span},
|
||||
ir::Ir,
|
||||
parser::{self, ExprTy, Ident, Node, parse_file},
|
||||
};
|
||||
|
||||
const EXTENSION: &str = ".lang";
|
||||
|
||||
pub fn parse_program(path: impl AsRef<Path>, output: &mut CompilerOutput) -> Option<Ir> {
|
||||
let path = path.as_ref();
|
||||
let mut imports = Imports::default();
|
||||
let dir = path.parent().unwrap();
|
||||
imports.add(path.file_stem().unwrap().to_str().unwrap());
|
||||
while let Some(next) = imports.new.pop() {
|
||||
imports.done.insert(next.clone());
|
||||
let path = dir.join(next + EXTENSION);
|
||||
println!("=== {path:?}");
|
||||
let body = parse_file(path, output)?;
|
||||
print!("{}", body.new_dsp());
|
||||
let defs = scan(&mut imports, &body, output);
|
||||
for (name, spans) in &defs.duplicates {
|
||||
output.error(CompilerMsg {
|
||||
msg: format!("Multiple definitions found for {name}"),
|
||||
spans: spans.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if !output.errors.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let ir = Ir::default();
|
||||
Some(ir)
|
||||
}
|
||||
|
||||
pub fn scan(imports: &mut Imports, body: &parser::Body, output: &mut CompilerOutput) -> Defs {
|
||||
let mut defs = Defs::default();
|
||||
for item in &body.items {
|
||||
match &item.ty {
|
||||
ExprTy::Define { target, const_, .. } if *const_ => match &target.ty {
|
||||
ExprTy::Ident(name) => defs.add(name),
|
||||
_ => output.error(("Invalid left hand side of definition", target.span)),
|
||||
},
|
||||
ExprTy::Import(import) => {
|
||||
defs.add(import);
|
||||
imports.add(&import.name);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
defs
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Defs {
|
||||
map: HashMap<String, (usize, Span)>,
|
||||
duplicates: HashMap<String, Vec<Span>>,
|
||||
next_id: usize,
|
||||
}
|
||||
|
||||
impl Defs {
|
||||
pub fn add(&mut self, ident: &Ident) {
|
||||
if let Some(def) = self.map.get(&ident.name) {
|
||||
if let Some(spans) = self.duplicates.get_mut(&ident.name) {
|
||||
spans.push(ident.span);
|
||||
} else {
|
||||
self.duplicates
|
||||
.insert(ident.name.clone(), vec![def.1, ident.span]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
self.map
|
||||
.insert(ident.name.clone(), (self.next_id, ident.span));
|
||||
self.next_id += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Imports {
|
||||
done: HashSet<String>,
|
||||
new: Vec<String>,
|
||||
}
|
||||
|
||||
impl Imports {
|
||||
pub fn add(&mut self, name: &str) {
|
||||
if self.done.contains(name) || self.new.iter().any(|v| v == name) {
|
||||
return;
|
||||
}
|
||||
self.new.push(name.to_string());
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
let x = "test";
|
||||
let y = "test";
|
||||
|
||||
fn test(x: u32) "hello";
|
||||
fn test3() {
|
||||
arst
|
||||
}
|
||||
fn test2() "hello";
|
||||
Executable
BIN
Binary file not shown.
Executable
BIN
Binary file not shown.
@@ -0,0 +1,5 @@
|
||||
asm {
|
||||
mov eax, 1
|
||||
mov ebx, 39
|
||||
int 0x80
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
x : u32 = 3;
|
||||
while true {
|
||||
print("hello");
|
||||
print(x);
|
||||
other.thing();
|
||||
thing();
|
||||
break;
|
||||
}
|
||||
|
||||
y :: true;
|
||||
|
||||
if y => print("hello");
|
||||
|
||||
thing :: fn() {
|
||||
}
|
||||
|
||||
import other;
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,5 @@
|
||||
thing :: fn() {
|
||||
print("hello from other");
|
||||
}
|
||||
|
||||
import main;
|
||||
Reference in New Issue
Block a user