From 0614d48fcc7263c72d0ef1190357eeed77c515d1 Mon Sep 17 00:00:00 2001 From: shadow cat Date: Sun, 23 Mar 2025 18:40:07 -0400 Subject: [PATCH] questionable refactoring --- src/compiler/arch/riscv/asm.rs | 30 +++++-- src/compiler/arch/riscv/compile.rs | 15 +++- src/compiler/debug.rs | 23 ++++++ src/compiler/elf.rs | 8 +- src/compiler/mod.rs | 7 +- src/compiler/program.rs | 126 ++++++++++++++++++----------- src/ir/lower/func.rs | 1 - src/ir/lower/program.rs | 14 ++-- src/ir/lower/symbol.rs | 32 ++++++-- src/ir/upper/def.rs | 1 + src/main.rs | 17 ++-- src/parser/v3/lower/expr.rs | 3 + src/util/label.rs | 40 +++++++++ src/util/mod.rs | 2 + 14 files changed, 240 insertions(+), 79 deletions(-) create mode 100644 src/compiler/debug.rs create mode 100644 src/util/label.rs diff --git a/src/compiler/arch/riscv/asm.rs b/src/compiler/arch/riscv/asm.rs index 350325b..62369a3 100644 --- a/src/compiler/arch/riscv/asm.rs +++ b/src/compiler/arch/riscv/asm.rs @@ -1,6 +1,5 @@ use crate::{ - compiler::program::{Addr, Instr, SymTable}, - ir::Symbol, + compiler::program::{Addr, Instr, SymTable}, ir::Symbol, util::LabeledFmt }; use super::*; @@ -169,14 +168,29 @@ impl LinkerInstruction { } } +// this is not even remotely worth it but technically it doesn't use the heap I think xdddddddddd impl std::fmt::Debug for LinkerInstruction { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.fmt_label(f, &|f, s| write!(f, "{s:?}")) + } +} + +pub struct DebugInstr<'a, R, S, L: Fn(&mut std::fmt::Formatter<'_>, &S) -> std::fmt::Result> { + instr: &'a LinkerInstruction, + label: &'a L, +} + +impl LabeledFmt for LinkerInstruction { + fn fmt_label(&self, f: &mut std::fmt::Formatter<'_>, label: &dyn crate::util::Labeler) -> std::fmt::Result { match self { Self::ECall => write!(f, "ecall"), Self::EBreak => write!(f, "ebreak"), Self::Li { dest, imm } => write!(f, "li {dest:?}, {imm:?}"), Self::Mv { dest, src } => write!(f, "mv {dest:?}, {src:?}"), - Self::La { dest, src } => write!(f, "la {dest:?}, {src:?}"), + Self::La { dest, src } => { + write!(f, "la {dest:?}, @")?; + label(f, src) + }, Self::Load { width, dest, @@ -207,8 +221,14 @@ impl std::fmt::Debug for LinkerInstructi imm, } => write!(f, "{}i {dest:?}, {src:?}, {imm}", opstr(*op, *funct)), Self::Jal { dest, offset } => write!(f, "jal {dest:?}, {offset:?}"), - Self::Call(s) => write!(f, "call {s:?}"), - Self::J(s) => write!(f, "j {s:?}"), + Self::Call(s) => { + write!(f, "call ")?; + label(f, s) + } + Self::J(s) => { + write!(f, "j ")?; + label(f, s) + } Self::Ret => write!(f, "ret"), } } diff --git a/src/compiler/arch/riscv/compile.rs b/src/compiler/arch/riscv/compile.rs index 892e6a8..fec696a 100644 --- a/src/compiler/arch/riscv/compile.rs +++ b/src/compiler/arch/riscv/compile.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use crate::{ - compiler::{arch::riscv::Reg, create_program, Addr}, + compiler::{arch::riscv::Reg, debug::DebugInfo, UnlinkedProgram}, ir::{ arch::riscv64::{RV64Instruction as AI, RegRef}, IRLInstruction as IRI, IRLProgram, Len, Size, @@ -47,9 +47,10 @@ fn mov_mem( } } -pub fn compile(program: IRLProgram) -> (Vec, Option) { +pub fn compile(program: &IRLProgram) -> UnlinkedProgram
  • { let mut fns = Vec::new(); let mut data = Vec::new(); + let mut dbg = DebugInfo::new(program.labels().to_vec()); for (sym, d) in program.ro_data() { data.push((d.clone(), *sym)); } @@ -83,7 +84,9 @@ pub fn compile(program: IRLProgram) -> (Vec, Option) { v.push(LI::sd(ra, stack_ra, sp)); } } + let mut irli = Vec::new(); for i in &f.instructions { + irli.push((v.len(), format!("{i:?}"))); match i { IRI::Mv { dest, src } => todo!(), IRI::Ref { dest, src } => todo!(), @@ -212,6 +215,7 @@ pub fn compile(program: IRLProgram) -> (Vec, Option) { } } } + dbg.push_fn(irli); if has_stack { if let Some(stack_ra) = stack_ra { v.push(LI::ld(ra, stack_ra, sp)); @@ -221,5 +225,10 @@ pub fn compile(program: IRLProgram) -> (Vec, Option) { v.push(LI::Ret); fns.push((v, *sym)); } - create_program(fns, data, Some(program.entry()), &program) + UnlinkedProgram { + fns: fns.into_iter().map(|(v, s, ..)| (v, s)).collect(), + ro_data: data, + start: Some(program.entry()), + dbg, + } } diff --git a/src/compiler/debug.rs b/src/compiler/debug.rs new file mode 100644 index 0000000..81d1d33 --- /dev/null +++ b/src/compiler/debug.rs @@ -0,0 +1,23 @@ +use crate::ir::Symbol; + +pub struct DebugInfo { + pub sym_labels: Vec>, + pub ir_lower: Vec>, +} + +impl DebugInfo { + pub fn new(sym_labels: Vec>) -> Self { + Self { + ir_lower: Vec::new(), + sym_labels, + } + } + + pub fn push_fn(&mut self, instrs: Vec<(usize, String)>) { + self.ir_lower.push(instrs); + } + + pub fn sym_label(&self, s: Symbol) -> Option<&String> { + self.sym_labels[*s].as_ref() + } +} diff --git a/src/compiler/elf.rs b/src/compiler/elf.rs index 258162c..6f0ef1c 100644 --- a/src/compiler/elf.rs +++ b/src/compiler/elf.rs @@ -1,4 +1,4 @@ -use super::program::Addr; +use super::{program::Addr, LinkedProgram}; #[repr(C)] pub struct ELF64Header { @@ -102,3 +102,9 @@ pub fn create(program: Vec, start_offset: Addr) -> Vec { unsafe fn as_u8_slice(p: &T) -> &[u8] { core::slice::from_raw_parts((p as *const T) as *const u8, size_of::()) } + +impl LinkedProgram { + pub fn to_elf(self) -> Vec { + create(self.code, self.start.expect("no start found")) + } +} diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index d7aab08..1db9c44 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -1,13 +1,14 @@ pub mod arch; +mod debug; mod elf; mod program; mod target; +use arch::riscv; pub use program::*; use crate::ir::IRLProgram; -pub fn compile(program: IRLProgram) -> Vec { - let (compiled, start) = arch::riscv::compile(program); - elf::create(compiled, start.expect("no start method found")) +pub fn compile(program: &IRLProgram) -> UnlinkedProgram { + arch::riscv::compile(program) } diff --git a/src/compiler/program.rs b/src/compiler/program.rs index 546d363..800710c 100644 --- a/src/compiler/program.rs +++ b/src/compiler/program.rs @@ -1,57 +1,63 @@ use std::collections::HashMap; -use crate::ir::{IRLProgram, Symbol}; +use crate::{ + ir::Symbol, + util::{Labelable, LabeledFmt, Labeler}, +}; -pub fn create_program( - fns: Vec<(Vec, Symbol)>, - ro_data: Vec<(Vec, Symbol)>, - start: Option, - program: &IRLProgram, -) -> (Vec, Option) { - let mut data = Vec::new(); - let mut sym_table = SymTable::new(fns.len() + ro_data.len()); - let mut missing = HashMap::>::new(); - for (val, id) in ro_data { - sym_table.insert(id, Addr(data.len() as u64)); - data.extend(val); - } - data.resize(data.len() + (4 - data.len() % 4), 0); - for (fun, id) in fns { - sym_table.insert(id, Addr(data.len() as u64)); - for i in fun { - let i_pos = Addr(data.len() as u64); - if let Some(sym) = i.push(&mut data, &sym_table, i_pos, false) { - if let Some(vec) = missing.get_mut(&sym) { - vec.push((i_pos, i)); - } else { - missing.insert(sym, vec![(i_pos, i)]); +use super::debug::DebugInfo; + +pub struct LinkedProgram { + pub code: Vec, + pub start: Option, +} + +pub struct UnlinkedProgram { + pub fns: Vec<(Vec, Symbol)>, + pub ro_data: Vec<(Vec, Symbol)>, + pub start: Option, + pub dbg: DebugInfo, +} + +impl UnlinkedProgram { + pub fn link(self) -> LinkedProgram { + let mut data = Vec::new(); + let mut sym_table = SymTable::new(self.fns.len() + self.ro_data.len()); + let mut missing = HashMap::>::new(); + for (val, id) in self.ro_data { + sym_table.insert(id, Addr(data.len() as u64)); + data.extend(val); + } + data.resize(data.len() + (4 - data.len() % 4), 0); + for (fun, id) in self.fns { + sym_table.insert(id, Addr(data.len() as u64)); + for i in fun { + let i_pos = Addr(data.len() as u64); + if let Some(sym) = i.push(&mut data, &sym_table, i_pos, false) { + if let Some(vec) = missing.get_mut(&sym) { + vec.push((i_pos, i)); + } else { + missing.insert(sym, vec![(i_pos, i)]); + } + } + } + if let Some(vec) = missing.remove(&id) { + for (addr, i) in vec { + let mut replace = Vec::new(); + i.push(&mut replace, &sym_table, addr, true); + let pos = addr.val() as usize; + data[pos..pos + replace.len()].copy_from_slice(&replace); } } } - if let Some(vec) = missing.remove(&id) { - for (addr, i) in vec { - let mut replace = Vec::new(); - i.push(&mut replace, &sym_table, addr, true); - let pos = addr.val() as usize; - data[pos..pos + replace.len()].copy_from_slice(&replace); - } + assert!(missing.is_empty()); + LinkedProgram { + code: data, + start: self + .start + .map(|s| sym_table.get(s).expect("start symbol doesn't exist")), } } - for (s, f) in program.fns() { - println!( - "{}: {:?}", - f.name, - sym_table.get(*s).map(|a| { - let pos = a.0 + 0x1000 + 0x40 + 0x38; - format!("0x{:x}", pos) - }) - ); - } - assert!(missing.is_empty()); - ( - data, - start.map(|s| sym_table.get(s).expect("start symbol doesn't exist")), - ) } pub trait Instr { @@ -83,3 +89,31 @@ impl SymTable { } } } + +impl + LabeledFmt> std::fmt::Debug for UnlinkedProgram { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for ((v, s), irli) in self.fns.iter().zip(&self.dbg.ir_lower) { + writeln!(f, "{}:", self.dbg.sym_label(*s).unwrap())?; + let mut liter = irli.iter(); + let mut cur = liter.next(); + for (i, instr) in v.iter().enumerate() { + if let Some(c) = cur { + if i == c.0 { + writeln!(f, " {}:", c.1)?; + cur = liter.next(); + } + } + writeln!( + f, + " {:?}", + instr.labeled(&|f: &mut std::fmt::Formatter, s: &Symbol| write!( + f, + "{}", + self.dbg.sym_label(*s).unwrap_or(&format!("{:?}", *s)) + )) + )?; + } + } + Ok(()) + } +} diff --git a/src/ir/lower/func.rs b/src/ir/lower/func.rs index 3223c19..808c0a6 100644 --- a/src/ir/lower/func.rs +++ b/src/ir/lower/func.rs @@ -5,7 +5,6 @@ use std::collections::HashMap; #[derive(Debug)] pub struct IRLFunction { - pub name: String, pub instructions: Vec, pub stack: HashMap, pub args: Vec<(VarID, Size)>, diff --git a/src/ir/lower/program.rs b/src/ir/lower/program.rs index beefaa3..ae525f0 100644 --- a/src/ir/lower/program.rs +++ b/src/ir/lower/program.rs @@ -58,7 +58,8 @@ impl IRLProgram { continue; } let data = &p.data[src.0]; - let sym = builder.ro_data(src, data); + let ddef = p.get_data(*src); + let sym = builder.ro_data(src, data, Some(ddef.label.clone())); instrs.push(IRLInstruction::LoadData { dest: dest.id, offset: 0, @@ -75,14 +76,14 @@ impl IRLProgram { let Type::Array(ty, len) = &def.ty else { return Err(format!("tried to load {} as slice", p.type_name(&def.ty))); }; - let sym = builder.ro_data(src, data); + let sym = builder.ro_data(src, data, Some(def.label.clone())); instrs.push(IRLInstruction::LoadAddr { dest: dest.id, offset: 0, src: sym, }); - let sym = builder.anon_ro_data(&(*len as u64).to_le_bytes()); + let sym = builder.anon_ro_data(&(*len as u64).to_le_bytes(), Some(format!("len: {}", len))); instrs.push(IRLInstruction::LoadData { dest: dest.id, offset: 8, @@ -133,7 +134,6 @@ impl IRLProgram { builder.write_fn( sym, IRLFunction { - name: f.name.clone(), instructions: instrs, makes_call, args: f @@ -144,14 +144,10 @@ impl IRLProgram { ret_size: p.size_of_type(&f.ret).expect("unsized type"), stack, }, + Some(f.name.clone()), ); } let sym_space = builder.finish().expect("we failed the mission"); - // println!("fns:"); - // for (a, f) in sym_space.fns() { - // println!(" {:?}: {}", a, f.name); - // } - // println!("datas: {}", sym_space.ro_data().len()); Ok(Self { sym_space, entry }) } diff --git a/src/ir/lower/symbol.rs b/src/ir/lower/symbol.rs index f7be0ed..6702d8f 100644 --- a/src/ir/lower/symbol.rs +++ b/src/ir/lower/symbol.rs @@ -18,6 +18,7 @@ impl std::ops::Deref for WritableSymbol { pub struct SymbolSpace { ro_data: Vec<(Symbol, Vec)>, fns: Vec<(Symbol, IRLFunction)>, + labels: Vec>, } pub struct SymbolSpaceBuilder { @@ -27,6 +28,7 @@ pub struct SymbolSpaceBuilder { data_map: HashMap, ro_data: Vec<(Symbol, Vec)>, fns: Vec<(Symbol, IRLFunction)>, + labels: Vec>, } impl SymbolSpace { @@ -38,6 +40,7 @@ impl SymbolSpace { data_map: HashMap::new(), ro_data: Vec::new(), fns: Vec::new(), + labels: Vec::new(), }; for e in entries { s.func(e); @@ -50,23 +53,26 @@ impl SymbolSpace { pub fn fns(&self) -> &[(Symbol, IRLFunction)] { &self.fns } + pub fn labels(&self) -> &[Option] { + &self.labels + } } impl SymbolSpaceBuilder { pub fn pop_fn(&mut self) -> Option<(WritableSymbol, FnID)> { self.unwritten_fns.pop() } - pub fn anon_ro_data(&mut self, data: &[u8]) -> Symbol { + pub fn anon_ro_data(&mut self, data: &[u8], label: Option) -> Symbol { let sym = self.reserve(); - self.write_ro_data(sym, data.to_vec()) + self.write_ro_data(sym, data.to_vec(), label) } - pub fn ro_data(&mut self, id: &DataID, data: &[u8]) -> Symbol { + pub fn ro_data(&mut self, id: &DataID, data: &[u8], label: Option) -> Symbol { match self.data_map.get(id) { Some(s) => *s, None => { let sym = self.reserve(); self.data_map.insert(*id, *sym); - self.write_ro_data(sym, data.to_vec()) + self.write_ro_data(sym, data.to_vec(), label) } } } @@ -82,18 +88,31 @@ impl SymbolSpaceBuilder { } } } - pub fn write_ro_data(&mut self, sym: WritableSymbol, data: Vec) -> Symbol { + pub fn write_ro_data( + &mut self, + sym: WritableSymbol, + data: Vec, + name: Option, + ) -> Symbol { let data = data.into(); self.ro_data.push((*sym, data)); + self.labels[sym.0 .0] = name; *sym } - pub fn write_fn(&mut self, sym: WritableSymbol, func: IRLFunction) -> Symbol { + pub fn write_fn( + &mut self, + sym: WritableSymbol, + func: IRLFunction, + name: Option, + ) -> Symbol { self.fns.push((*sym, func)); + self.labels[sym.0 .0] = name; *sym } pub fn reserve(&mut self) -> WritableSymbol { let val = self.symbols; self.symbols += 1; + self.labels.push(None); WritableSymbol(Symbol(val)) } pub fn len(&self) -> usize { @@ -104,6 +123,7 @@ impl SymbolSpaceBuilder { Some(SymbolSpace { fns: self.fns, ro_data: self.ro_data, + labels: self.labels, }) } else { None diff --git a/src/ir/upper/def.rs b/src/ir/upper/def.rs index 97c4314..e64bc7c 100644 --- a/src/ir/upper/def.rs +++ b/src/ir/upper/def.rs @@ -29,6 +29,7 @@ pub struct VarDef { pub struct DataDef { pub ty: Type, pub origin: Origin, + pub label: String, } #[derive(Debug, Clone, Copy)] diff --git a/src/main.rs b/src/main.rs index 4065e5c..91970c7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ #![feature(box_patterns)] #![feature(try_trait_v2)] +#![feature(trait_alias)] use ir::{IRLProgram, IRUProgram}; use parser::{NodeParsable, PModule, PStatement, ParserCtx}; @@ -21,15 +22,16 @@ use common::*; fn main() { let file = std::env::args_os().nth(1); let gdb = std::env::args().nth(2).is_some_and(|a| a == "--debug"); + let asm = std::env::args().nth(2).is_some_and(|a| a == "--asm"); if let Some(path) = file { let file = std::fs::read_to_string(path).expect("failed to read file"); - run_file(&file, gdb); + run_file(&file, gdb, asm); } else { run_stdin(); } } -fn run_file(file: &str, gdb: bool) { +fn run_file(file: &str, gdb: bool, asm: bool) { let mut ctx = ParserCtx::from(file); let res = PModule::parse_node(&mut ctx); if ctx.output.errs.is_empty() { @@ -50,9 +52,14 @@ fn run_file(file: &str, gdb: bool) { output.write_for(&mut stdout(), file); if output.errs.is_empty() { let program = IRLProgram::create(&namespace).expect("morir"); - let bin = compiler::compile(program); - println!("compiled"); - save_run(&bin, gdb); + let unlinked = compiler::compile(&program); + if asm { + println!("{:?}", unlinked); + } else { + let bin = unlinked.link().to_elf(); + println!("compiled"); + save_run(&bin, gdb); + } } } } diff --git a/src/parser/v3/lower/expr.rs b/src/parser/v3/lower/expr.rs index f2adb23..129a9b1 100644 --- a/src/parser/v3/lower/expr.rs +++ b/src/parser/v3/lower/expr.rs @@ -13,6 +13,7 @@ impl FnLowerable for PExpr { DataDef { ty: Type::Bits(8).arr(data.len() as u32), origin: Origin::File(l.span), + label: format!("string \"{}\"", s.replace("\n", "\\n")) }, data, ); @@ -26,6 +27,7 @@ impl FnLowerable for PExpr { DataDef { ty, origin: Origin::File(l.span), + label: format!("char '{c}'"), }, c.to_string().as_bytes().to_vec(), ); @@ -40,6 +42,7 @@ impl FnLowerable for PExpr { DataDef { ty, origin: Origin::File(l.span), + label: format!("num {n:?}") }, n.whole.parse::().unwrap().to_le_bytes().to_vec(), ); diff --git a/src/util/label.rs b/src/util/label.rs new file mode 100644 index 0000000..e504d08 --- /dev/null +++ b/src/util/label.rs @@ -0,0 +1,40 @@ +// this is not even remotely worth it but technically it doesn't use the heap I think xdddddddddd + +use std::marker::PhantomData; +pub trait Labeler = Fn(&mut std::fmt::Formatter<'_>, &S) -> std::fmt::Result; + +pub trait Labelable { + fn labeled>(&self, l: L) -> Labeled + where + Self: Sized; +} + +pub struct Labeled<'a, T, L: Labeler, S> { + data: &'a T, + labeler: L, + pd: PhantomData, +} + +pub trait LabeledFmt { + fn fmt_label( + &self, + f: &mut std::fmt::Formatter<'_>, + label: &dyn Labeler, + ) -> std::fmt::Result; +} + +impl, S> Labelable for T { + fn labeled>(&self, l: L) -> Labeled { + Labeled { + data: self, + labeler: l, + pd: PhantomData, + } + } +} + +impl, L: Labeler, S> std::fmt::Debug for Labeled<'_, T, L, S> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.data.fmt_label(f, &self.labeler) + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 9d34b52..52ba79e 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,5 +1,7 @@ mod padder; mod bits; +mod label; pub use padder::*; pub use bits::*; +pub use label::*;