From 77735953a537bc800bb2fc5e0b7147959ebb09ee Mon Sep 17 00:00:00 2001 From: shadow cat Date: Sat, 12 Oct 2024 18:24:15 -0400 Subject: [PATCH] missing symbol filling --- src/compiler/elf.rs | 6 +- src/compiler/mod.rs | 6 +- src/compiler/program.rs | 129 +++++++++++++++++++++++++++++------- src/compiler/riscv64/asm.rs | 63 +++++++++--------- src/compiler/riscv64/mod.rs | 89 ++++++++----------------- src/main.rs | 2 - 6 files changed, 171 insertions(+), 124 deletions(-) diff --git a/src/compiler/elf.rs b/src/compiler/elf.rs index 3343d48..258162c 100644 --- a/src/compiler/elf.rs +++ b/src/compiler/elf.rs @@ -1,3 +1,5 @@ +use super::program::Addr; + #[repr(C)] pub struct ELF64Header { magic: u32, @@ -49,7 +51,7 @@ pub struct SectionHeader { } // this is currently specialized for riscv64; obviously add params later -pub fn create(program: Vec, start_offset: u64) -> Vec { +pub fn create(program: Vec, start_offset: Addr) -> Vec { let addr_start = 0x1000; let page_size = 0x1000; let progam_size = std::mem::size_of_val(&program[..]) as u64; @@ -76,7 +78,7 @@ pub fn create(program: Vec, start_offset: u64) -> Vec { ty: 0x2, // executable machine: 0xf3, // risc-v e_version: 0x1, - entry: addr_start + program_pos + start_offset, + entry: addr_start + program_pos + start_offset.val(), program_header_offset: size_of::() as u64, section_header_offset: 0x0, // C ABI (16 bit instruction align) + double precision floats diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index a38eef0..ad1371d 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -10,6 +10,8 @@ mod program; mod riscv64; mod target; +use program::*; + pub fn main() { use std::io::prelude::*; let dir = Path::new("./build"); @@ -62,7 +64,3 @@ pub fn main() { } } -// qemu-riscv64 -g 1234 test & -// riscv64-linux-gnu-gdb -q \ -// -ex "target remote :1234" \ -// test diff --git a/src/compiler/program.rs b/src/compiler/program.rs index 5cdd5b1..ea80c4b 100644 --- a/src/compiler/program.rs +++ b/src/compiler/program.rs @@ -1,35 +1,118 @@ -use std::collections::HashMap; +use std::{collections::HashMap, ops::Deref}; -pub fn create_program( - ro_data: HashMap>, - functions: Vec>, -) -> (Vec, Option) { +pub fn create_program(map: SymMap, start: Symbol) -> (Vec, Option) { let mut data = Vec::new(); - let mut sym_map = HashMap::new(); - for (key, val) in ro_data { - sym_map.insert(key, data.len() as u64); + let mut sym_table = SymTable::new(map.len()); + let mut missing = HashMap::>::new(); + for (val, id) in map.ro_data { + sym_table.insert(id, Addr(data.len() as u64)); data.extend(val); } - let mut start = None; - for fun in functions { - if fun.label == "_start" { - start = Some(data.len() as u64); + for (fun, id) in map.functions { + sym_table.insert(id, Addr(data.len() as u64)); + for i in fun { + let i_pos = Addr(data.len() as u64); + if let Some(sym) = i.push(&mut data, &sym_table, i_pos, false) { + if let Some(vec) = missing.get_mut(&sym) { + vec.push((i_pos, i)); + } else { + missing.insert(sym, vec![(i_pos, i)]); + } + } } - sym_map.insert(fun.label, data.len() as u64); - for i in fun.instructions { - let pos = data.len() as u64; - i.push(&mut data, &sym_map, pos); + if let Some(vec) = missing.remove(&id) { + for (addr, i) in vec { + let mut replace = Vec::new(); + i.push(&mut replace, &sym_table, addr, true); + let pos = addr.val() as usize; + data[pos..pos + replace.len()].copy_from_slice(&replace); + } } } - (data, start) -} - -pub struct Function { - pub label: String, - pub instructions: Vec, + assert!(missing.is_empty()); + (data, sym_table.get(start)) } pub trait Instr { - fn push(&self, data: &mut Vec, ro_map: &HashMap, pos: u64) -> Option; + fn push(&self, data: &mut Vec, syms: &SymTable, pos: Addr, missing: bool) -> Option; } +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct Addr(u64); +impl Addr { + const NONE: Self = Self(!0); + pub fn val(&self) -> u64 { + self.0 + } +} + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub struct Symbol(usize); +/// intentionally does not have copy or clone; +/// this should only be consumed once +pub struct WritableSymbol(Symbol); + +impl Deref for WritableSymbol { + type Target = Symbol; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +pub struct SymMap { + i: usize, + ro_data: Vec<(Vec, Symbol)>, + functions: Vec<(Vec, Symbol)>, +} + +impl SymMap { + pub fn new() -> Self { + Self { + i: 0, + ro_data: Vec::new(), + functions: Vec::new(), + } + } + pub fn push_ro_data(&mut self, data: impl Into>) -> (Symbol, usize) { + let sym = self.reserve(); + self.write_ro_data(sym, data) + } + pub fn push_fn(&mut self, instructions: Vec) -> Symbol { + let sym = self.reserve(); + self.write_fn(sym, instructions) + } + pub fn write_ro_data(&mut self, sym: WritableSymbol, data: impl Into>) -> (Symbol, usize) { + let data = data.into(); + let len = data.len(); + self.ro_data.push((data, *sym)); + (*sym, len) + } + pub fn write_fn(&mut self, sym: WritableSymbol, instructions: Vec) -> Symbol { + self.functions.push((instructions, *sym)); + *sym + } + pub fn reserve(&mut self) -> WritableSymbol { + let val = self.i; + self.i += 1; + WritableSymbol(Symbol(val)) + } + pub fn len(&self) -> usize { + self.functions.len() + self.ro_data.len() + } +} + +pub struct SymTable(Vec); +impl SymTable { + pub fn new(len: usize) -> Self { + Self(vec![Addr::NONE; len]) + } + pub fn insert(&mut self, sym: Symbol, addr: Addr) { + self.0[sym.0] = addr; + } + pub fn get(&self, sym: Symbol) -> Option { + match self.0[sym.0] { + Addr::NONE => None, + addr => Some(addr), + } + } +} diff --git a/src/compiler/riscv64/asm.rs b/src/compiler/riscv64/asm.rs index ea3bf20..58c41ce 100644 --- a/src/compiler/riscv64/asm.rs +++ b/src/compiler/riscv64/asm.rs @@ -1,57 +1,56 @@ -use crate::compiler::program::Instr; +use crate::compiler::program::{Addr, Instr, SymTable, Symbol}; use super::*; pub enum AsmInstruction { Addi(Reg, Reg, i32), - La(Reg, String), + La(Reg, Symbol), Jal(Reg, i32), - Jala(String), - Ja(String), + Call(Symbol), + J(Symbol), Ret, Ecall, + Li(Reg, i32), } impl Instr for AsmInstruction { - fn push( - &self, - data: &mut Vec, - sym_map: &std::collections::HashMap, - pos: u64, - ) -> Option { - match self { - Self::Addi(dest, src, imm) => { - data.extend(addi(*dest, *src, BitsI32::new(*imm)).to_le_bytes()); - } + fn push(&self, data: &mut Vec, sym_map: &SymTable, pos: Addr, missing: bool) -> Option { + let last = match self { + Self::Addi(dest, src, imm) => addi(*dest, *src, BitsI32::new(*imm)), Self::La(dest, sym) => { - if let Some(addr) = sym_map.get(sym) { - let offset = *addr as i32 - pos as i32; + if let Some(addr) = sym_map.get(*sym) { + let offset = addr.val() as i32 - pos.val() as i32; data.extend(auipc(*dest, BitsI32::new(0)).to_le_bytes()); - data.extend(addi(*dest, *dest, BitsI32::new(offset)).to_le_bytes()); + addi(*dest, *dest, BitsI32::new(offset)) } else { - return Some(sym.to_string()); + data.extend_from_slice(&[0; 2 * 4]); + return Some(*sym); } } - Self::Jal(dest, offset) => data.extend(jal(*dest, BitsI32::new(*offset)).to_le_bytes()), - Self::Ja(sym) => { - if let Some(addr) = sym_map.get(sym) { - let offset = *addr as i32 - pos as i32; - data.extend(j(BitsI32::new(offset)).to_le_bytes()); + Self::Jal(dest, offset) => jal(*dest, BitsI32::new(*offset)), + Self::J(sym) => { + if let Some(addr) = sym_map.get(*sym) { + let offset = addr.val() as i32 - pos.val() as i32; + j(BitsI32::new(offset)) } else { - return Some(sym.to_string()); + data.extend_from_slice(&[0; 4]); + return Some(*sym); } } - Self::Jala(sym) => { - if let Some(addr) = sym_map.get(sym) { - let offset = *addr as i32 - pos as i32; - data.extend(jal(ra, BitsI32::new(offset)).to_le_bytes()); + Self::Call(sym) => { + if let Some(addr) = sym_map.get(*sym) { + let offset = addr.val() as i32 - pos.val() as i32; + jal(ra, BitsI32::new(offset)) } else { - return Some(sym.to_string()); + data.extend_from_slice(&[0; 4]); + return Some(*sym); } } - Self::Ret => data.extend(ret().to_le_bytes()), - Self::Ecall => data.extend(ecall().to_le_bytes()), - } + Self::Ret => ret(), + Self::Ecall => ecall(), + Self::Li(reg, val) => addi(*reg, zero, BitsI32::new(*val)), + }; + data.extend(last.to_le_bytes()); None } } diff --git a/src/compiler/riscv64/mod.rs b/src/compiler/riscv64/mod.rs index 30eec6e..7fb6867 100644 --- a/src/compiler/riscv64/mod.rs +++ b/src/compiler/riscv64/mod.rs @@ -5,9 +5,7 @@ mod opcode; mod reg; mod single; -use std::collections::HashMap; - -use super::{elf, program::{create_program, Function}}; +use super::{create_program, elf, SymMap}; use crate::util::BitsI32; use base::*; use funct::{op::*, width}; @@ -18,64 +16,33 @@ use single::*; pub fn gen() -> Vec { use asm::AsmInstruction as I; - // let mut program = Vec::new(); - // let msg = b"Hello world!\n"; - // program.extend(msg); - // program.resize(((program.len() - 1) / 4 + 1) * 4, 0); - // let start = program.len() as u64; - // let instructions = [ - // auipc(t0, BitsI32::new(0)), - // addi(t0, t0, BitsI32::new(-(start as i32))), - // addi(a0, zero, BitsI32::new(1)), - // mv(a1, t0), - // addi(a2, zero, BitsI32::new(msg.len() as i32)), - // addi(a7, zero, BitsI32::new(64)), - // addi(t0, zero, const { BitsI32::new(-10) }), - // ecall(), - // // exit - // addi(a0, zero, BitsI32::new(0)), - // addi(a7, zero, BitsI32::new(93)), - // ecall(), - // j(BitsI32::new(0)), - // ]; - // for i in instructions { - // program.extend(i.to_le_bytes()); - // } - let msg = b"Hello world!\n"; - let msg2 = b"IT WORKS!!!!\n"; - let ro_data = HashMap::from([ - ("msg".to_string(), msg.to_vec()), - ("msg2".to_string(), msg2.to_vec()), + let mut table = SymMap::new(); + let (msg, len) = table.push_ro_data(b"Hello world!\n"); + let (msg2, len2) = table.push_ro_data(b"IT WORKS!!!!\n"); + let print_stuff = table.reserve(); + let start = table.push_fn(vec![ + I::Call(*print_stuff), + I::Li(a0, 0), + I::Li(a7, 93), + I::Ecall, + I::Jal(zero, 0), ]); - let functions = vec![ - Function { - label: "print_stuff".to_string(), - instructions: vec![ - I::Addi(a0, zero, 1), - I::La(a1, "msg".to_string()), - I::Addi(a2, zero, msg.len() as i32), - I::Addi(a7, zero, 64), - I::Ecall, - I::Addi(a0, zero, 1), - I::La(a1, "msg2".to_string()), - I::Addi(a2, zero, msg2.len() as i32), - I::Addi(a7, zero, 64), - I::Ecall, - I::Ret, - ] - }, - Function { - label: "_start".to_string(), - instructions: vec![ - I::Jala("print_stuff".to_string()), - I::Ecall, - I::Addi(a0, zero, 0), - I::Addi(a7, zero, 93), - I::Ecall, - I::Jal(zero, 0), - ] - }, - ]; - let (program, start) = create_program(ro_data, functions); + table.write_fn( + print_stuff, + vec![ + I::Li(a0, 1), + I::La(a1, msg), + I::Li(a2, len as i32), + I::Li(a7, 64), + I::Ecall, + I::Li(a0, 1), + I::La(a1, msg2), + I::Li(a2, len2 as i32), + I::Li(a7, 64), + I::Ecall, + I::Ret, + ], + ); + let (program, start) = create_program(table, start); elf::create(program, start.expect("no start!")) } diff --git a/src/main.rs b/src/main.rs index f5ca2d8..b553643 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,4 @@ #![feature(box_patterns)] -#![feature(const_unbounded_shifts)] -#![feature(unbounded_shifts)] mod util; mod compiler;