From 0e0dbd647db7214a8bb310014b4d4c0ea02b60ef Mon Sep 17 00:00:00 2001 From: shadow cat Date: Sat, 7 Dec 2024 20:03:19 -0500 Subject: [PATCH] travel fn path for compilation --- data/test.lang | 9 +- src/compiler/arch/mod.rs | 2 - src/compiler/arch/riscv64/asm.rs | 10 +-- src/compiler/arch/riscv64/compile.rs | 15 ++-- src/compiler/arch/riscv64/mod.rs | 54 +----------- src/compiler/mod.rs | 62 +------------- src/compiler/program.rs | 81 +++--------------- src/ir/id.rs | 8 -- src/ir/lower/data.rs | 6 -- src/ir/lower/func.rs | 5 +- src/ir/lower/mod.rs | 4 +- src/ir/lower/program.rs | 89 ++++++++++++------- src/ir/lower/symbol.rs | 122 +++++++++++++++++++++++++++ src/main.rs | 12 ++- src/parser/v3/lower/def.rs | 10 ++- 15 files changed, 230 insertions(+), 259 deletions(-) delete mode 100644 src/ir/lower/data.rs create mode 100644 src/ir/lower/symbol.rs diff --git a/data/test.lang b/data/test.lang index 26bb092..d2b9a86 100644 --- a/data/test.lang +++ b/data/test.lang @@ -1,6 +1,11 @@ fn start() { print("Hello World!\n", 13); - exit(39); + print("Hello World!\n", 13); + exit(0); +} + +fn unused() { + print("Hello World!\n", 13); } fn print(msg, len) { @@ -12,7 +17,7 @@ fn print(msg, len) { } } -fn exit(status) { +fn exit(status: 32) { asm (a0 = status) { ld a0, 0, a0 li a7, 93 diff --git a/src/compiler/arch/mod.rs b/src/compiler/arch/mod.rs index 763b813..ceb8572 100644 --- a/src/compiler/arch/mod.rs +++ b/src/compiler/arch/mod.rs @@ -1,3 +1 @@ pub mod riscv64; -use super::*; - diff --git a/src/compiler/arch/riscv64/asm.rs b/src/compiler/arch/riscv64/asm.rs index 238e018..d7d8422 100644 --- a/src/compiler/arch/riscv64/asm.rs +++ b/src/compiler/arch/riscv64/asm.rs @@ -1,4 +1,4 @@ -use crate::{compiler::program::{Addr, Instr, SymTable}, ir::AddrID}; +use crate::{compiler::program::{Addr, Instr, SymTable}, ir::Symbol}; use super::*; @@ -12,10 +12,10 @@ pub enum LinkerInstruction { Sd { src: Reg, offset: i32, base: Reg }, Ld { dest: Reg, offset: i32, base: Reg }, Mv { dest: Reg, src: Reg }, - La { dest: Reg, src: AddrID }, + La { dest: Reg, src: Symbol }, Jal { dest: Reg, offset: i32 }, - Call(AddrID), - J(AddrID), + Call(Symbol), + J(Symbol), Ret, Ecall, Li { dest: Reg, imm: i64 }, @@ -28,7 +28,7 @@ impl Instr for LinkerInstruction { sym_map: &SymTable, pos: Addr, missing: bool, - ) -> Option { + ) -> Option { let last = match self { Self::Add { dest, src1, src2 } => add(*dest, *src1, *src2), Self::Addi { dest, src, imm } => addi(*dest, *src, BitsI32::new(*imm)), diff --git a/src/compiler/arch/riscv64/compile.rs b/src/compiler/arch/riscv64/compile.rs index 97be242..68bb6bb 100644 --- a/src/compiler/arch/riscv64/compile.rs +++ b/src/compiler/arch/riscv64/compile.rs @@ -13,11 +13,10 @@ use super::{LinkerInstruction as LI, *}; pub fn compile(program: IRLProgram) -> (Vec, Option) { let mut fns = Vec::new(); let mut data = Vec::new(); - for d in program.data { - data.push((d.data, d.addr)); + for (sym, d) in program.ro_data() { + data.push((d.clone(), *sym)); } - let mut start = None; - for f in program.fns { + for (sym, f) in program.fns() { let mut v = Vec::new(); let mut stack = HashMap::new(); let mut stack_len = 0; @@ -106,12 +105,10 @@ pub fn compile(program: IRLProgram) -> (Vec, Option) { IRI::Ret { src } => todo!(), } } - if f.name == "start" { - start = Some(f.addr); - } else { + if *sym != program.entry() { v.push(LI::Ret); } - fns.push((v, f.addr)); + fns.push((v, *sym)); } - create_program(fns, data, start) + create_program(fns, data, Some(program.entry())) } diff --git a/src/compiler/arch/riscv64/mod.rs b/src/compiler/arch/riscv64/mod.rs index 76cb12d..59a3a12 100644 --- a/src/compiler/arch/riscv64/mod.rs +++ b/src/compiler/arch/riscv64/mod.rs @@ -1,63 +1,17 @@ mod asm; mod base; +mod compile; mod funct; mod opcode; mod reg; mod single; -mod compile; use crate::util::BitsI32; -pub use asm::*; use base::*; use funct::{op::*, width}; use opcode::*; -pub use reg::*; -pub use compile::*; - use single::*; -pub fn gen() -> Vec { - // use asm::LinkerInstruction as I; - // let mut table = SymMap::new(); - // let (msg, len) = table.push_ro_data_size(b"Hello world!\n".to_vec()); - // let (msg2, len2) = table.push_ro_data_size(b"IT WORKS!!!!\n".to_vec()); - // let print_stuff = table.reserve(); - // let start = table.push_fn(vec![ - // I::Call(*print_stuff), - // I::Li { dest: a0, imm: 0 }, - // I::Li { dest: a7, imm: 93 }, - // I::Ecall, - // I::Jal { - // dest: zero, - // offset: 0, - // }, - // ]); - // table.write_fn( - // print_stuff, - // vec![ - // I::Li { dest: a0, imm: 1 }, - // I::La { dest: a1, src: msg }, - // I::Li { - // dest: a2, - // imm: len as i64, - // }, - // I::Li { dest: a7, imm: 64 }, - // I::Ecall, - // I::Li { dest: a0, imm: 1 }, - // I::La { - // dest: a1, - // src: msg2, - // }, - // I::Li { - // dest: a2, - // imm: len2 as i64, - // }, - // I::Li { dest: a7, imm: 64 }, - // I::Ecall, - // I::Ret, - // ], - // ); - // let (program, start) = create_program(table, Some(start)); - // elf::create(program, start.expect("no start!")) - todo!("remove this"); -} +pub use asm::*; +pub use compile::*; +pub use reg::*; diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 980feeb..8d82a69 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -1,10 +1,3 @@ -use std::{ - fs::{create_dir_all, OpenOptions}, - os::unix::fs::OpenOptionsExt, - path::Path, - process::Command, -}; - pub mod arch; mod elf; mod program; @@ -16,58 +9,5 @@ use crate::ir::IRLProgram; pub fn compile(program: IRLProgram) -> Vec { let (compiled, start) = arch::riscv64::compile(program); - let binary = elf::create(compiled, start.expect("no start method found")); - binary -} - -pub fn main() { - use std::io::prelude::*; - let dir = Path::new("./build"); - create_dir_all(dir).expect("Failed to create or confirm build directory"); - let name = Path::new("test"); - let path = dir.join(name); - let path = path.as_os_str(); - let mut file = OpenOptions::new() - .create(true) - .write(true) - .truncate(true) - .mode(0o750) - .open(path) - .expect("Failed to create file"); - file.write_all(&arch::riscv64::gen()) - .expect("Failed to write to file"); - file.sync_all().expect("Failed to sync file"); - let mut p = Command::new("qemu-riscv64"); - let run_gdb = std::env::args().nth(1).is_some_and(|a| a == "d"); - let proc = if run_gdb { - p.arg("-g").arg("1234").arg(path).spawn() - } else { - p.arg(path).spawn() - }; - if let Ok(mut process) = proc { - let mut print_exit = true; - if run_gdb { - match Command::new("gdb") - .arg("-q") - .arg("-ex") - .arg("target remote :1234") - .arg(path) - .spawn() - { - Ok(mut gdb) => { - gdb.wait().expect("xd"); - } - Err(e) => { - print_exit = false; - println!("gdb error: {e:?}"); - process.kill().expect("uh oh"); - } - } - } - if let Ok(status) = process.wait() { - if print_exit && status.code().is_none_or(|c| c != 0) { - println!("{}", status); - } - } - } + elf::create(compiled, start.expect("no start method found")) } diff --git a/src/compiler/program.rs b/src/compiler/program.rs index 68be362..6c0b0ba 100644 --- a/src/compiler/program.rs +++ b/src/compiler/program.rs @@ -1,15 +1,15 @@ -use std::{collections::HashMap, ops::Deref}; +use std::collections::HashMap; -use crate::ir::AddrID; +use crate::ir::Symbol; pub fn create_program( - fns: Vec<(Vec, AddrID)>, - ro_data: Vec<(Vec, AddrID)>, - start: Option, + fns: Vec<(Vec, Symbol)>, + ro_data: Vec<(Vec, Symbol)>, + start: Option, ) -> (Vec, Option) { let mut data = Vec::new(); let mut sym_table = SymTable::new(fns.len() + ro_data.len()); - let mut missing = HashMap::>::new(); + let mut missing = HashMap::>::new(); for (val, id) in ro_data { sym_table.insert(id, Addr(data.len() as u64)); data.extend(val); @@ -45,7 +45,7 @@ pub fn create_program( pub trait Instr { fn push(&self, data: &mut Vec, syms: &SymTable, pos: Addr, missing: bool) - -> Option; + -> Option; } #[derive(Debug, Clone, Copy, PartialEq)] @@ -57,75 +57,16 @@ impl Addr { } } -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub struct Symbol(usize); -/// intentionally does not have copy or clone; -/// this should only be consumed once -pub struct WritableSymbol(Symbol); - -impl Deref for WritableSymbol { - type Target = Symbol; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -pub struct SymMap { - i: usize, - ro_data: Vec<(Vec, Symbol)>, - functions: Vec<(Vec, Symbol)>, -} - -impl SymMap { - pub fn new() -> Self { - Self { - i: 0, - ro_data: Vec::new(), - functions: Vec::new(), - } - } - pub fn push_ro_data(&mut self, data: Vec) -> Symbol { - let sym = self.reserve(); - self.write_ro_data(sym, data.into()) - } - pub fn push_ro_data_size(&mut self, data: Vec) -> (Symbol, usize) { - let sym = self.reserve(); - let len = data.len(); - (self.write_ro_data(sym, data), len) - } - pub fn push_fn(&mut self, instructions: Vec) -> Symbol { - let sym = self.reserve(); - self.write_fn(sym, instructions) - } - pub fn write_ro_data(&mut self, sym: WritableSymbol, data: Vec) -> Symbol { - let data = data.into(); - self.ro_data.push((data, *sym)); - *sym - } - pub fn write_fn(&mut self, sym: WritableSymbol, instructions: Vec) -> Symbol { - self.functions.push((instructions, *sym)); - *sym - } - pub fn reserve(&mut self) -> WritableSymbol { - let val = self.i; - self.i += 1; - WritableSymbol(Symbol(val)) - } - pub fn len(&self) -> usize { - self.functions.len() + self.ro_data.len() - } -} - pub struct SymTable(Vec); impl SymTable { pub fn new(len: usize) -> Self { Self(vec![Addr::NONE; len]) } - pub fn insert(&mut self, sym: AddrID, addr: Addr) { - self.0[sym.0] = addr; + pub fn insert(&mut self, sym: Symbol, addr: Addr) { + self.0[*sym] = addr; } - pub fn get(&self, sym: AddrID) -> Option { - match self.0[sym.0] { + pub fn get(&self, sym: Symbol) -> Option { + match self.0[*sym] { Addr::NONE => None, addr => Some(addr), } diff --git a/src/ir/id.rs b/src/ir/id.rs index 63ed37e..2106a58 100644 --- a/src/ir/id.rs +++ b/src/ir/id.rs @@ -8,8 +8,6 @@ pub struct VarID(pub usize); pub struct FnID(pub usize); #[derive(Clone, Copy, Eq, Hash, PartialEq)] pub struct DataID(pub usize); -#[derive(Clone, Copy, Eq, Hash, PartialEq)] -pub struct AddrID(pub usize); impl Debug for VarID { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -34,9 +32,3 @@ impl Debug for DataID { write!(f, "data{}", self.0) } } - -impl Debug for AddrID { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "@{}", self.0) - } -} diff --git a/src/ir/lower/data.rs b/src/ir/lower/data.rs deleted file mode 100644 index 84b92cd..0000000 --- a/src/ir/lower/data.rs +++ /dev/null @@ -1,6 +0,0 @@ -use super::AddrID; - -pub struct IRLData { - pub addr: AddrID, - pub data: Vec, -} diff --git a/src/ir/lower/func.rs b/src/ir/lower/func.rs index 75ba270..ae883b0 100644 --- a/src/ir/lower/func.rs +++ b/src/ir/lower/func.rs @@ -6,7 +6,6 @@ use std::collections::HashMap; #[derive(Debug)] pub struct IRLFunction { pub name: String, - pub addr: AddrID, pub instructions: Vec, pub stack: HashMap, pub args: Vec<(VarID, usize)>, @@ -24,11 +23,11 @@ pub enum IRLInstruction { }, LoadAddr { dest: VarID, - src: AddrID, + src: Symbol, }, Call { dest: VarID, - f: AddrID, + f: Symbol, args: Vec<(VarID, usize)>, }, AsmBlock { diff --git a/src/ir/lower/mod.rs b/src/ir/lower/mod.rs index 61b26da..5559d58 100644 --- a/src/ir/lower/mod.rs +++ b/src/ir/lower/mod.rs @@ -1,9 +1,9 @@ mod func; -mod data; mod program; +mod symbol; pub use func::*; -pub use data::*; pub use program::*; +pub use symbol::*; use super::*; diff --git a/src/ir/lower/program.rs b/src/ir/lower/program.rs index df7b6af..4935877 100644 --- a/src/ir/lower/program.rs +++ b/src/ir/lower/program.rs @@ -1,30 +1,33 @@ use std::collections::HashMap; -use super::{AddrID, IRLData, IRLFunction, IRLInstruction, IRUInstruction, Namespace, VarID}; +use crate::ir::{FnID, SymbolSpace}; + +use super::{IRLFunction, IRLInstruction, IRUInstruction, Namespace, Symbol, VarID}; pub struct IRLProgram { - pub fns: Vec, - pub data: Vec, + sym_space: SymbolSpace, + entry: Symbol, } // NOTE: there are THREE places here where I specify size (8) impl IRLProgram { - pub fn create(ns: &Namespace) -> Self { - let mut fns = Vec::new(); - let mut data = Vec::new(); - let data_len = ns.data.len(); - for (i, d) in ns.data.iter().enumerate() { - data.push(IRLData { - addr: AddrID(i), - data: d.clone(), - }) - } + pub fn create(ns: &Namespace) -> Option { + let mut start = None; for (i, f) in ns.fns.iter().enumerate() { - let f = f.as_ref().unwrap(); + let f = f.as_ref()?; + if f.name == "start" { + start = Some(FnID(i)); + } + } + let start = start?; + let mut builder = SymbolSpace::with_entries(&[start]); + let entry = builder.func(&start); + while let Some((sym, i)) = builder.pop_fn() { + let f = ns.fns[i.0].as_ref().unwrap(); let mut instructions = Vec::new(); let mut stack = HashMap::new(); - let mut alloc = |i: &VarID| { + let mut alloc_stack = |i: &VarID| { if !stack.contains_key(i) { stack.insert(*i, 8); } @@ -32,38 +35,42 @@ impl IRLProgram { for i in &f.instructions { instructions.push(match i { IRUInstruction::Mv { dest, src } => { - alloc(dest); + alloc_stack(dest); IRLInstruction::Mv { dest: *dest, src: *src, } } IRUInstruction::Ref { dest, src } => { - alloc(dest); + alloc_stack(dest); IRLInstruction::Ref { dest: *dest, src: *src, } } IRUInstruction::LoadData { dest, src } => { - alloc(dest); + alloc_stack(dest); + let addr = builder.ro_data(src, &ns.data[src.0]); IRLInstruction::LoadAddr { dest: *dest, - src: AddrID(src.0), + src: addr, } } IRUInstruction::LoadFn { dest, src } => { - alloc(dest); + alloc_stack(dest); + let sym = builder.func(src); IRLInstruction::LoadAddr { dest: *dest, - src: AddrID(src.0 + data_len), + src: sym, } } IRUInstruction::Call { dest, f, args } => { - alloc(dest); + alloc_stack(dest); + let fid = &ns.fn_map[f]; + let sym = builder.func(fid); IRLInstruction::Call { dest: *dest, - f: AddrID(ns.fn_map[f].0 + data_len), + f: sym, args: args.iter().map(|a| (*a, 8)).collect(), } } @@ -74,14 +81,34 @@ impl IRLProgram { IRUInstruction::Ret { src } => IRLInstruction::Ret { src: *src }, }); } - fns.push(IRLFunction { - name: f.name.clone(), - addr: AddrID(i + data_len), - instructions, - args: f.args.iter().map(|a| (*a, 8)).collect(), - stack, - }) + builder.write_fn( + sym, + IRLFunction { + name: f.name.clone(), + instructions, + args: f.args.iter().map(|a| (*a, 8)).collect(), + stack, + }, + ); } - Self { fns, data } + let sym_space = builder.finish().expect("we failed the mission"); + println!("fns:"); + for (a, f) in sym_space.fns() { + println!(" {:?}: {}", a, f.name); + } + println!("datas: {}", sym_space.ro_data().len()); + Some(Self { sym_space, entry }) + } + + pub fn entry(&self) -> Symbol { + self.entry + } +} + +impl std::ops::Deref for IRLProgram { + type Target = SymbolSpace; + + fn deref(&self) -> &Self::Target { + &self.sym_space } } diff --git a/src/ir/lower/symbol.rs b/src/ir/lower/symbol.rs new file mode 100644 index 0000000..4f65e92 --- /dev/null +++ b/src/ir/lower/symbol.rs @@ -0,0 +1,122 @@ +use std::collections::HashMap; + +use super::{DataID, FnID, IRLFunction}; + +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +pub struct Symbol(usize); +/// intentionally does not have copy or clone; +/// this should only be consumed once +pub struct WritableSymbol(Symbol); + +impl std::ops::Deref for WritableSymbol { + type Target = Symbol; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +pub struct SymbolSpace { + ro_data: Vec<(Symbol, Vec)>, + fns: Vec<(Symbol, IRLFunction)>, +} + +pub struct SymbolSpaceBuilder { + symbols: usize, + unwritten_fns: Vec<(WritableSymbol, FnID)>, + fn_map: HashMap, + data_map: HashMap, + ro_data: Vec<(Symbol, Vec)>, + fns: Vec<(Symbol, IRLFunction)>, +} + +impl SymbolSpace { + pub fn with_entries(entries: &[FnID]) -> SymbolSpaceBuilder { + let mut s = SymbolSpaceBuilder { + symbols: 0, + unwritten_fns: Vec::new(), + fn_map: HashMap::new(), + data_map: HashMap::new(), + ro_data: Vec::new(), + fns: Vec::new(), + }; + for e in entries { + s.func(e); + } + s + } + pub fn ro_data(&self) -> &[(Symbol, Vec)] { + &self.ro_data + } + pub fn fns(&self) -> &[(Symbol, IRLFunction)] { + &self.fns + } +} + +impl SymbolSpaceBuilder { + pub fn pop_fn(&mut self) -> Option<(WritableSymbol, FnID)> { + self.unwritten_fns.pop() + } + pub fn ro_data(&mut self, id: &DataID, data: &Vec) -> Symbol { + match self.data_map.get(id) { + Some(s) => *s, + None => { + let sym = self.reserve(); + self.data_map.insert(*id, *sym); + self.write_ro_data(sym, data.clone()) + } + } + } + pub fn func(&mut self, id: &FnID) -> Symbol { + match self.fn_map.get(id) { + Some(s) => *s, + None => { + let wsym = self.reserve(); + let sym = *wsym; + self.unwritten_fns.push((wsym, *id)); + self.fn_map.insert(*id, sym); + sym + } + } + } + pub fn write_ro_data(&mut self, sym: WritableSymbol, data: Vec) -> Symbol { + let data = data.into(); + self.ro_data.push((*sym, data)); + *sym + } + pub fn write_fn(&mut self, sym: WritableSymbol, func: IRLFunction) -> Symbol { + self.fns.push((*sym, func)); + *sym + } + pub fn reserve(&mut self) -> WritableSymbol { + let val = self.symbols; + self.symbols += 1; + WritableSymbol(Symbol(val)) + } + pub fn len(&self) -> usize { + self.fns.len() + self.ro_data.len() + } + pub fn finish(self) -> Option { + if self.unwritten_fns.is_empty() { + Some(SymbolSpace { + fns: self.fns, + ro_data: self.ro_data, + }) + } else { + None + } + } +} + +impl std::fmt::Debug for Symbol { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "@{}", self.0) + } +} + +impl std::ops::Deref for Symbol { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/src/main.rs b/src/main.rs index 86d72b3..72f6372 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,7 +25,6 @@ fn main() { } else { run_stdin(); } - // compiler::main(); } fn run_file(file: &str, gdb: bool) { @@ -38,13 +37,12 @@ fn run_file(file: &str, gdb: bool) { let mut namespace = Namespace::new(); module.lower(&mut namespace.push(), &mut ctx.output); if ctx.output.errs.is_empty() { - // println!("{:#?}", namespace.fns); - // println!("vars:"); - // for def in &namespace.var_defs { - // println!("{}: {}", def.name, namespace.type_name(&def.ty)); - // } + println!("vars:"); + for def in &namespace.var_defs { + println!(" {}: {}", def.name, namespace.type_name(&def.ty)); + } let program = IRLProgram::create(&namespace); - let bin = compiler::compile(program); + let bin = compiler::compile(program.expect("morir")); println!("compiled"); save_run(&bin, gdb); } diff --git a/src/parser/v3/lower/def.rs b/src/parser/v3/lower/def.rs index 6e0cbb7..a491f67 100644 --- a/src/parser/v3/lower/def.rs +++ b/src/parser/v3/lower/def.rs @@ -37,7 +37,7 @@ impl PType { output: &mut ParserOutput, span: FileSpan, ) -> Type { - match namespace.get(&self.name).map(|ids| ids.ty).flatten() { + match namespace.get(&self.name).and_then(|ids| ids.ty) { Some(id) => { if self.args.is_empty() { Type::Concrete(id) @@ -51,8 +51,12 @@ impl PType { } } None => { - output.err(ParserMsg::from_span(span, "Type not found".to_string())); - Type::Error + if let Ok(num) = self.name.parse::() { + Type::Bits(num) + } else { + output.err(ParserMsg::from_span(span, "Type not found".to_string())); + Type::Error + } } } }