From de79445ede2650741d146e64a8ea176897f8879b Mon Sep 17 00:00:00 2001 From: Shadow Cat Date: Fri, 11 Oct 2024 17:31:03 -0400 Subject: [PATCH] START OF COMPILER --- .gitignore | 1 + test.lang => data/err.lang | 9 ++ data/test.lang | 4 + src/compiler/mod.rs | 42 ++++++ src/compiler/program.rs | 53 ++++++++ src/compiler/riscv64/asm.rs | 2 + src/compiler/riscv64/elf.rs | 101 +++++++++++++++ src/compiler/riscv64/instruction/base.rs | 58 +++++++++ src/compiler/riscv64/instruction/func.rs | 20 +++ src/compiler/riscv64/instruction/mod.rs | 53 ++++++++ src/compiler/riscv64/instruction/opcode.rs | 9 ++ src/compiler/riscv64/instruction/reg.rs | 93 ++++++++++++++ src/compiler/riscv64/mod.rs | 33 +++++ src/compiler/target.rs | 7 + src/main.rs | 16 +-- src/parser/mod.rs | 14 ++ src/{v1/parser => parser/v1}/body.rs | 0 src/{v1/parser => parser/v1}/cursor.rs | 0 src/{v1/parser => parser/v1}/error.rs | 0 src/{v1/parser => parser/v1}/expr.rs | 14 +- src/{ => parser}/v1/mod.rs | 25 +++- src/{v1/parser/mod.rs => parser/v1/module.rs} | 17 +-- src/{v1/parser => parser/v1}/node.rs | 0 src/{v1/parser => parser/v1}/token/cursor.rs | 0 src/{v1/parser => parser/v1}/token/file.rs | 0 src/{v1/parser => parser/v1}/token/keyword.rs | 0 src/{v1/parser => parser/v1}/token/mod.rs | 0 src/{v1/parser => parser/v1}/token/symbol.rs | 0 src/{v1/parser => parser/v1}/val.rs | 6 +- src/{v2/parser => parser/v2}/body.rs | 0 src/{v2/parser => parser/v2}/cursor.rs | 0 src/{v2/parser => parser/v2}/error.rs | 0 src/{v2/parser => parser/v2}/expr.rs | 0 src/{ => parser}/v2/mod.rs | 15 ++- src/{v2/parser/mod.rs => parser/v2/module.rs} | 13 +- src/{v2/parser => parser/v2}/util.rs | 0 src/util/bits.rs | 120 ++++++++++++++++++ src/util/mod.rs | 41 +----- src/util/padder.rs | 38 ++++++ 39 files changed, 710 insertions(+), 94 deletions(-) rename test.lang => data/err.lang (86%) create mode 100644 data/test.lang create mode 100644 src/compiler/mod.rs create mode 100644 src/compiler/program.rs create mode 100644 src/compiler/riscv64/asm.rs create mode 100644 src/compiler/riscv64/elf.rs create mode 100644 src/compiler/riscv64/instruction/base.rs create mode 100644 src/compiler/riscv64/instruction/func.rs create mode 100644 src/compiler/riscv64/instruction/mod.rs create mode 100644 src/compiler/riscv64/instruction/opcode.rs create mode 100644 src/compiler/riscv64/instruction/reg.rs create mode 100644 src/compiler/riscv64/mod.rs create mode 100644 src/compiler/target.rs create mode 100644 src/parser/mod.rs rename src/{v1/parser => parser/v1}/body.rs (100%) rename src/{v1/parser => parser/v1}/cursor.rs (100%) rename src/{v1/parser => parser/v1}/error.rs (100%) rename src/{v1/parser => parser/v1}/expr.rs (96%) rename src/{ => parser}/v1/mod.rs (69%) rename src/{v1/parser/mod.rs => parser/v1/module.rs} (87%) rename src/{v1/parser => parser/v1}/node.rs (100%) rename src/{v1/parser => parser/v1}/token/cursor.rs (100%) rename src/{v1/parser => parser/v1}/token/file.rs (100%) rename src/{v1/parser => parser/v1}/token/keyword.rs (100%) rename src/{v1/parser => parser/v1}/token/mod.rs (100%) rename src/{v1/parser => parser/v1}/token/symbol.rs (100%) rename src/{v1/parser => parser/v1}/val.rs (97%) rename src/{v2/parser => parser/v2}/body.rs (100%) rename src/{v2/parser => parser/v2}/cursor.rs (100%) rename src/{v2/parser => parser/v2}/error.rs (100%) rename src/{v2/parser => parser/v2}/expr.rs (100%) rename src/{ => parser}/v2/mod.rs (73%) rename src/{v2/parser/mod.rs => parser/v2/module.rs} (91%) rename src/{v2/parser => parser/v2}/util.rs (100%) create mode 100644 src/util/bits.rs create mode 100644 src/util/padder.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..82448cc 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +/build diff --git a/test.lang b/data/err.lang similarity index 86% rename from test.lang rename to data/err.lang index 17c58a8..aec9269 100644 --- a/test.lang +++ b/data/err.lang @@ -20,3 +20,12 @@ fn main() { b }; } + +fn test() { + let r = 3; + let a = } +} + +fn test2() { + +} diff --git a/data/test.lang b/data/test.lang new file mode 100644 index 0000000..973a6a3 --- /dev/null +++ b/data/test.lang @@ -0,0 +1,4 @@ +fn main() { + let x = 3; + print(x); +} diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs new file mode 100644 index 0000000..80a056c --- /dev/null +++ b/src/compiler/mod.rs @@ -0,0 +1,42 @@ +use std::{ + fs::{create_dir_all, OpenOptions}, + os::unix::fs::OpenOptionsExt, + path::Path, + process::Command, +}; + +mod riscv64; +mod program; +mod target; + +pub fn main() { + use std::io::prelude::*; + let dir = Path::new("build"); + create_dir_all(dir).expect("Failed to create or confirm build directory"); + let name = Path::new("test"); + let path = dir.join(name); + let path = path.as_os_str(); + let mut file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .mode(0o750) + .open(path) + .expect("Failed to create file"); + file.write_all(&riscv64::gen()) + .expect("Failed to write to file"); + file.sync_all().expect("Failed to sync file"); + if let Ok(mut process) = Command::new("qemu-riscv64").arg(path).spawn() { + if let Ok(status) = process.wait() { + if status.code().is_none_or(|c| c != 0) { + println!("{}", status); + } + } + } +} + +// qemu-riscv64 -g 1234 test & +// riscv64-linux-gnu-gdb -q \ +// -ex "target remote :1234" \ +// test + diff --git a/src/compiler/program.rs b/src/compiler/program.rs new file mode 100644 index 0000000..cffb6c1 --- /dev/null +++ b/src/compiler/program.rs @@ -0,0 +1,53 @@ +use std::collections::HashMap; + +pub struct Program { + data: Vec, + ro_map: HashMap, +} + +impl Program { + pub fn new(data: HashMap>) -> Self { + let mut ro_data = Vec::new(); + let mut ro_map = HashMap::new(); + for (key, val) in data { + ro_map.insert(key, ro_data.len()); + ro_data.extend(val); + } + Self { + data: ro_data, + ro_map, + } + } +} + +pub fn create_program( + ro_data: HashMap>, + functions: Vec>, +) -> Vec { + let mut data = Vec::new(); + let mut ro_map = HashMap::new(); + for (key, val) in ro_data { + ro_map.insert(key, data.len()); + data.extend(val); + } + // let mut fn_map = HashMap::new(); + for fun in functions { + for i in fun.instructions { + data.extend(i.to_le_bytes()); + } + } + data +} + +pub struct Function { + label: String, + instructions: Vec, +} + +pub trait Instr { + fn to_le_bytes(&self) -> impl IntoIterator; +} + +struct SymbolInstr { + i: usize +} diff --git a/src/compiler/riscv64/asm.rs b/src/compiler/riscv64/asm.rs new file mode 100644 index 0000000..92ef7e7 --- /dev/null +++ b/src/compiler/riscv64/asm.rs @@ -0,0 +1,2 @@ +pub enum Instruction { +} diff --git a/src/compiler/riscv64/elf.rs b/src/compiler/riscv64/elf.rs new file mode 100644 index 0000000..bd4f6d7 --- /dev/null +++ b/src/compiler/riscv64/elf.rs @@ -0,0 +1,101 @@ +#[repr(C)] +pub struct ELF64Header { + magic: u32, + class: u8, + endianness: u8, + ei_version: u8, + os_abi: u8, + os_abi_ver: u8, + pad: [u8; 7], + ty: u16, + machine: u16, + e_version: u32, + entry: u64, + program_header_offset: u64, + section_header_offset: u64, + flags: u32, + header_size: u16, + program_header_entry_size: u16, + program_header_num: u16, + section_header_entry_size: u16, + section_header_num: u16, + section_header_str_idx: u16, +} + +#[repr(C)] +pub struct ProgramHeader { + ty: u32, + flags: u32, + offset: u64, + vaddr: u64, + paddr: u64, + filesz: u64, + memsz: u64, + align: u64, +} + +#[repr(C)] +pub struct SectionHeader { + name_idx: u32, + ty: u32, + flags: u64, + addr: u64, + offset: u64, + size: u64, + link: u32, + info: u32, + addr_align: u64, + entry_size: u64, +} + +pub fn create(program: Vec, start_offset: u64) -> Vec { + let addr_start = 0x1000; + let page_size = 0x1000; + let progam_size = std::mem::size_of_val(&program[..]) as u64; + let program_header = ProgramHeader { + ty: 0x1, // LOAD + flags: 0b101, // executable, readable + offset: 0x0, + vaddr: addr_start, + paddr: addr_start, + filesz: progam_size, + memsz: progam_size, + align: page_size, + }; + let header_len = (size_of::() + size_of::()) as u64; + let program_pos = header_len; + let header = ELF64Header { + magic: 0x7f_45_4c_46u32.swap_bytes(), + class: 0x2, // 64 bit + endianness: 0x1, // little endian + ei_version: 0x1, + os_abi: 0x0, // system-v + os_abi_ver: 0x0, + pad: [0x0; 7], + ty: 0x2, // executable + machine: 0xf3, // risc-v + e_version: 0x1, + entry: addr_start + program_pos + start_offset, + program_header_offset: size_of::() as u64, + section_header_offset: 0x0, + // C ABI (16 bit instruction align) + double precision floats + flags: 0x1 | 0x4, + header_size: size_of::() as u16, + program_header_entry_size: size_of::() as u16, + program_header_num: 0x1, + section_header_entry_size: size_of::() as u16, + section_header_num: 0x0, + section_header_str_idx: 0x0, + }; + let mut bytes: Vec = Vec::new(); + unsafe { + bytes.extend(as_u8_slice(&header)); + bytes.extend(as_u8_slice(&program_header)); + bytes.extend(program); + } + bytes +} + +unsafe fn as_u8_slice(p: &T) -> &[u8] { + core::slice::from_raw_parts((p as *const T) as *const u8, size_of::()) +} diff --git a/src/compiler/riscv64/instruction/base.rs b/src/compiler/riscv64/instruction/base.rs new file mode 100644 index 0000000..59a69b7 --- /dev/null +++ b/src/compiler/riscv64/instruction/base.rs @@ -0,0 +1,58 @@ +use super::{Reg, OPCODE_MASK}; +use crate::{compiler::program::Instr, util::{bit, bits, in_bit_range}}; + +pub struct Instruction(u32); + +use Instruction as I; + +impl Instr for Instruction { + fn to_le_bytes(&self) -> impl IntoIterator { + self.0.to_le_bytes().into_iter() + } +} + +pub const fn r_type(funct7: u32, rs2: Reg, rs1: Reg, funct3: u32, rd: Reg, opcode: u32) -> I { + I((funct7 << 25) + + (rs2.val() << 20) + + (rs1.val() << 15) + + (funct3 << 12) + + (rd.val() << 7) + + opcode) +} +pub const fn i_type(imm: i32, rs1: Reg, funct3: u32, rd: Reg, opcode: u32) -> I { + debug_assert!(in_bit_range(imm, 11, 0)); + I((bits(imm, 11, 0) << 20) + (rs1.val() << 15) + (funct3 << 12) + (rd.val() << 7) + opcode) +} +pub const fn s_type(rs2: Reg, rs1: Reg, funct3: u32, imm: i32, opcode: u32) -> I { + debug_assert!(in_bit_range(imm, 11, 0)); + I((bits(imm, 11, 5) << 25) + + (rs2.val() << 20) + + (rs1.val() << 15) + + (funct3 << 12) + + (bits(imm, 4, 0) << 7) + + opcode) +} +pub const fn b_type(rs2: Reg, rs1: Reg, funct3: u32, imm: i32, opcode: u32) -> I { + debug_assert!(in_bit_range(imm, 12, 1)); + I((bit(imm, 12) << 31) + + (bits(imm, 10, 5) << 25) + + (rs2.val() << 20) + + (rs1.val() << 15) + + (funct3 << 8) + + (bits(imm, 4, 1) << 8) + + (bit(imm, 11) << 7) + + opcode) +} +pub const fn u_type(imm: i32, rd: Reg, opcode: u32) -> I { + debug_assert!(in_bit_range(imm, 31, 12)); + I((bits(imm, 31, 12) << 12) + (rd.val() << 7) + opcode) +} +pub const fn j_type(imm: i32, rd: Reg, opcode: u32) -> I { + debug_assert!(in_bit_range(imm, 20, 1)); + I((bit(imm, 20) << 31) + + (bits(imm, 10, 1) << 21) + + (bit(imm, 11) << 20) + + (bits(imm, 19, 12) << 12) + + (rd.val() << 7) + + opcode) +} diff --git a/src/compiler/riscv64/instruction/func.rs b/src/compiler/riscv64/instruction/func.rs new file mode 100644 index 0000000..684a425 --- /dev/null +++ b/src/compiler/riscv64/instruction/func.rs @@ -0,0 +1,20 @@ +pub mod op { + pub const ADD : u32 = 0b000; + pub const SLL : u32 = 0b001; + pub const SLT : u32 = 0b010; + pub const SLTU: u32 = 0b011; + pub const XOR : u32 = 0b100; + pub const SR : u32 = 0b101; + pub const OR : u32 = 0b110; + pub const AND : u32 = 0b111; +} + +pub mod width { + pub const B : u32 = 0b000; + pub const H : u32 = 0b001; + pub const W : u32 = 0b010; + pub const D : u32 = 0b011; + pub const BU: u32 = 0b100; + pub const HU: u32 = 0b101; + pub const WU: u32 = 0b110; +} diff --git a/src/compiler/riscv64/instruction/mod.rs b/src/compiler/riscv64/instruction/mod.rs new file mode 100644 index 0000000..dfc06b4 --- /dev/null +++ b/src/compiler/riscv64/instruction/mod.rs @@ -0,0 +1,53 @@ +mod base; +mod reg; +mod opcode; +mod func; + +use base::*; +pub use reg::*; +use Instruction as I; +use opcode::*; +use func::{op::*, width}; + +pub const fn ecall() -> I { + i_type(0, zero, 0, zero, SYSTEM) +} +pub const fn ebreak() -> I { + i_type(1, zero, 0, zero, SYSTEM) +} +pub const fn auipc(dest: Reg, imm: i32) -> I { + u_type(imm, dest, AUIPC) +} +pub const fn ld(dest: Reg, offset: i32, base: Reg) -> I { + i_type(offset, base, width::D, dest, LOAD) +} +pub const fn lw(dest: Reg, offset: i32, base: Reg) -> I { + i_type(offset, base, width::W, dest, LOAD) +} +pub const fn lb(dest: Reg, offset: i32, base: Reg) -> I { + i_type(offset, base, width::B, dest, LOAD) +} +pub const fn sb(src: Reg, offset: i32, base: Reg) -> I { + s_type(src, base, width::B, offset, STORE) +} +pub const fn sw(src: Reg, offset: i32, base: Reg) -> I { + s_type(src, base, width::W, offset, STORE) +} +pub const fn sd(src: Reg, offset: i32, base: Reg) -> I { + s_type(src, base, width::D, offset, STORE) +} +pub const fn addi(dest: Reg, src: Reg, imm: i32) -> I { + i_type(imm, src, ADD, dest, IMM_OP) +} +pub const fn jal(offset: i32, dest: Reg) -> I { + j_type(offset, dest, JAL) +} + +// pseudo instructions that map to a single instruction + +pub const fn j(offset: i32) -> I { + jal(offset, zero) +} +pub const fn mv(dest: Reg, src: Reg) -> I { + addi(dest, src, 0) +} diff --git a/src/compiler/riscv64/instruction/opcode.rs b/src/compiler/riscv64/instruction/opcode.rs new file mode 100644 index 0000000..d1e12e7 --- /dev/null +++ b/src/compiler/riscv64/instruction/opcode.rs @@ -0,0 +1,9 @@ +pub const OPCODE_MASK: u32 = 0b1111111; + +pub const SYSTEM: u32 = 0b1110011; +pub const LOAD : u32 = 0b0000011; +pub const STORE : u32 = 0b0100011; +pub const AUIPC : u32 = 0b0010111; +pub const IMM_OP: u32 = 0b0010011; +pub const OP : u32 = 0b0110011; +pub const JAL : u32 = 0b1101111; diff --git a/src/compiler/riscv64/instruction/reg.rs b/src/compiler/riscv64/instruction/reg.rs new file mode 100644 index 0000000..e161b37 --- /dev/null +++ b/src/compiler/riscv64/instruction/reg.rs @@ -0,0 +1,93 @@ +#![allow(non_upper_case_globals)] + +pub struct Reg(u8); + +/// hard wired 0 +pub const zero: Reg = Reg(0); +/// return address +pub const ra: Reg = Reg(1); +/// stack pointer +pub const sp: Reg = Reg(2); +/// global pointer +pub const gp: Reg = Reg(3); +/// thread pointer +pub const tp: Reg = Reg(4); +/// temp / alternate link +pub const t0: Reg = Reg(5); +pub const t1: Reg = Reg(6); +pub const t2: Reg = Reg(7); + +pub const fp: Reg = Reg(8); +pub const s0: Reg = Reg(8); +pub const s1: Reg = Reg(9); + +pub const a0: Reg = Reg(10); +pub const a1: Reg = Reg(11); +pub const a2: Reg = Reg(12); +pub const a3: Reg = Reg(13); +pub const a4: Reg = Reg(14); +pub const a5: Reg = Reg(15); +pub const a6: Reg = Reg(16); +pub const a7: Reg = Reg(17); + +pub const s2: Reg = Reg(18); +pub const s3: Reg = Reg(19); +pub const s4: Reg = Reg(20); +pub const s5: Reg = Reg(21); +pub const s6: Reg = Reg(22); +pub const s7: Reg = Reg(23); +pub const s8: Reg = Reg(24); +pub const s9: Reg = Reg(25); +pub const s10: Reg = Reg(26); +pub const s11: Reg = Reg(27); + +pub const t3: Reg = Reg(28); +pub const t4: Reg = Reg(29); +pub const t5: Reg = Reg(30); +pub const t6: Reg = Reg(31); + + + +pub const ft0: Reg = Reg(0); +pub const ft1: Reg = Reg(1); +pub const ft2: Reg = Reg(2); +pub const ft3: Reg = Reg(3); +pub const ft4: Reg = Reg(4); +pub const ft5: Reg = Reg(5); +pub const ft6: Reg = Reg(6); +pub const ft7: Reg = Reg(7); + +pub const fs0: Reg = Reg(8); +pub const fs1: Reg = Reg(9); + +pub const fa0: Reg = Reg(10); +pub const fa1: Reg = Reg(11); +pub const fa2: Reg = Reg(12); +pub const fa3: Reg = Reg(13); +pub const fa4: Reg = Reg(14); +pub const fa5: Reg = Reg(15); +pub const fa6: Reg = Reg(16); +pub const fa7: Reg = Reg(17); + +pub const fs2: Reg = Reg(18); +pub const fs3: Reg = Reg(19); +pub const fs4: Reg = Reg(20); +pub const fs5: Reg = Reg(21); +pub const fs6: Reg = Reg(22); +pub const fs7: Reg = Reg(23); +pub const fs8: Reg = Reg(24); +pub const fs9: Reg = Reg(25); +pub const fs10: Reg = Reg(26); +pub const fs11: Reg = Reg(27); + +pub const ft8: Reg = Reg(28); +pub const ft9: Reg = Reg(29); +pub const ft10: Reg = Reg(30); +pub const ft11: Reg = Reg(31); + +impl Reg { + #[inline] + pub const fn val(&self) -> u32 { + self.0 as u32 + } +} diff --git a/src/compiler/riscv64/mod.rs b/src/compiler/riscv64/mod.rs new file mode 100644 index 0000000..e096644 --- /dev/null +++ b/src/compiler/riscv64/mod.rs @@ -0,0 +1,33 @@ +use crate::compiler::program::Instr; +mod elf; +mod instruction; +mod asm; + +use instruction::*; + +pub fn gen() -> Vec { + let mut program = Vec::new(); + let msg = b"Hello world!\n"; + program.extend(msg); + program.resize(((program.len() - 1) / 4 + 1) * 4, 0); + let start = program.len() as u64; + let instructions = [ + auipc(t0, 0), + addi(t0, t0, -(start as i32)), + addi(a0, zero, 1), + mv(a1, t0), + addi(a2, zero, msg.len() as i32), + addi(a7, zero, 64), + addi(t0, zero, 200), + ecall(), + // exit + addi(a0, zero, 0), + addi(a7, zero, 93), + ecall(), + j(0), + ]; + for i in instructions { + program.extend(i.to_le_bytes()); + } + elf::create(program, start) +} diff --git a/src/compiler/target.rs b/src/compiler/target.rs new file mode 100644 index 0000000..c882ec0 --- /dev/null +++ b/src/compiler/target.rs @@ -0,0 +1,7 @@ +pub trait Target { + type Reg; +} + +pub trait RegType { + type Size; +} diff --git a/src/main.rs b/src/main.rs index 419d0e7..f5ca2d8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,11 @@ #![feature(box_patterns)] +#![feature(const_unbounded_shifts)] +#![feature(unbounded_shifts)] mod util; -mod v1; -mod v2; +mod compiler; +mod parser; fn main() { - let arg = std::env::args_os().nth(1); - if let Some(path) = arg { - let file = std::fs::read_to_string(path).expect("failed to read file"); - println!("{file}"); - v1::parse_file(&file); - // v2::parse_file(&file); - } else { - v1::run_stdin(); - } + compiler::main(); } diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..4d841d3 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,14 @@ +mod v1; +mod v2; + +pub fn main() { + let arg = std::env::args_os().nth(1); + if let Some(path) = arg { + let file = std::fs::read_to_string(path).expect("failed to read file"); + println!("{file}"); + v1::parse_file(&file); + // v2::parse_file(&file); + } else { + v1::run_stdin(); + } +} diff --git a/src/v1/parser/body.rs b/src/parser/v1/body.rs similarity index 100% rename from src/v1/parser/body.rs rename to src/parser/v1/body.rs diff --git a/src/v1/parser/cursor.rs b/src/parser/v1/cursor.rs similarity index 100% rename from src/v1/parser/cursor.rs rename to src/parser/v1/cursor.rs diff --git a/src/v1/parser/error.rs b/src/parser/v1/error.rs similarity index 100% rename from src/v1/parser/error.rs rename to src/parser/v1/error.rs diff --git a/src/v1/parser/expr.rs b/src/parser/v1/expr.rs similarity index 96% rename from src/v1/parser/expr.rs rename to src/parser/v1/expr.rs index dedafb8..87361d1 100644 --- a/src/v1/parser/expr.rs +++ b/src/parser/v1/expr.rs @@ -1,13 +1,13 @@ use std::fmt::{Debug, Write}; use super::token::{Symbol, Token}; -use super::{Body, Node, Parsable, ParserError, ParserErrors, TokenCursor, Val}; +use super::{Body, Node, Parsable, ParserError, ParserErrors, TokenCursor, Literal}; pub type ExprNode = Node>; #[derive(Clone)] pub enum Expr { - Val(Node), + Lit(Node), Ident(String), BinaryOp(Operator, ExprNode, ExprNode), Block(Node), @@ -30,7 +30,7 @@ pub enum Operator { impl Expr { pub fn ended_with_error(&self) -> bool { match self { - Expr::Val(_) => false, + Expr::Lit(_) => false, Expr::Ident(_) => false, Expr::BinaryOp(_, _, e) => e.is_err() || e.as_ref().is_ok_and(|e| e.ended_with_error()), Expr::Block(b) => b.is_err(), @@ -48,8 +48,8 @@ impl Parsable for Expr { cursor.next(); if cursor.expect_peek()?.is_symbol(Symbol::CloseParen) { cursor.next(); - return Ok(Expr::Val(Node::new( - Val::Unit, + return Ok(Expr::Lit(Node::new( + Literal::Unit, cursor.next_pos().char_span(), ))); } @@ -62,7 +62,7 @@ impl Parsable for Expr { } else if next.is_symbol(Symbol::OpenCurly) { Self::Block(Node::parse(cursor, errors)) } else if let Some(val) = Node::maybe_parse(cursor, errors) { - Self::Val(val) + Self::Lit(val) } else { let next = cursor.peek().unwrap(); match &next.token { @@ -172,7 +172,7 @@ impl Operator { impl Debug for Expr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Expr::Val(c) => c.fmt(f)?, + Expr::Lit(c) => c.fmt(f)?, Expr::Ident(n) => f.write_str(n)?, Expr::Block(b) => b.fmt(f)?, Expr::BinaryOp(op, e1, e2) => { diff --git a/src/v1/mod.rs b/src/parser/v1/mod.rs similarity index 69% rename from src/v1/mod.rs rename to src/parser/v1/mod.rs index 00162fc..2a52ce8 100644 --- a/src/v1/mod.rs +++ b/src/parser/v1/mod.rs @@ -1,8 +1,22 @@ use std::io::{stdout, BufRead, BufReader}; -mod parser; +mod body; +mod cursor; +mod error; +mod expr; +mod module; +mod node; +mod token; +mod val; -use parser::{Module, Node, ParserErrors, Statement, TokenCursor}; +pub use body::*; +pub use cursor::*; +pub use error::*; +pub use expr::*; +pub use module::*; +pub use node::*; +pub use val::*; +use token::*; pub fn parse_file(file: &str) { let mut errors = ParserErrors::new(); @@ -10,8 +24,9 @@ pub fn parse_file(file: &str) { if let Ok(module) = node.as_ref() { println!("{module:#?}"); }; + let out = &mut stdout(); for err in errors.errs { - err.write_for(&mut stdout(), file).unwrap(); + err.write_for(out, file).unwrap(); } } @@ -20,12 +35,12 @@ pub fn run_stdin() { let mut errors = ParserErrors::new(); let str = &line.expect("failed to read line"); let mut cursor = TokenCursor::from(&str[..]); - let out = &mut stdout(); if let Ok(expr) = Node::::parse(&mut cursor, &mut errors).as_ref() { println!("{:?}", expr); } + let out = &mut stdout(); for err in errors.errs { - err.write_for(&mut stdout(), str).unwrap(); + err.write_for(out, str).unwrap(); } } } diff --git a/src/v1/parser/mod.rs b/src/parser/v1/module.rs similarity index 87% rename from src/v1/parser/mod.rs rename to src/parser/v1/module.rs index bad485e..857d880 100644 --- a/src/v1/parser/mod.rs +++ b/src/parser/v1/module.rs @@ -1,21 +1,6 @@ use std::fmt::Debug; -mod body; -mod cursor; -mod error; -mod expr; -mod token; -mod val; -mod node; - -pub use body::*; -pub use cursor::*; -pub use error::*; -pub use expr::*; -pub use val::*; -pub use node::*; - -use token::*; +use super::{token::*, Body, Node, Parsable, ParserError, ParserErrors, TokenCursor}; #[derive(Debug)] pub struct Module { diff --git a/src/v1/parser/node.rs b/src/parser/v1/node.rs similarity index 100% rename from src/v1/parser/node.rs rename to src/parser/v1/node.rs diff --git a/src/v1/parser/token/cursor.rs b/src/parser/v1/token/cursor.rs similarity index 100% rename from src/v1/parser/token/cursor.rs rename to src/parser/v1/token/cursor.rs diff --git a/src/v1/parser/token/file.rs b/src/parser/v1/token/file.rs similarity index 100% rename from src/v1/parser/token/file.rs rename to src/parser/v1/token/file.rs diff --git a/src/v1/parser/token/keyword.rs b/src/parser/v1/token/keyword.rs similarity index 100% rename from src/v1/parser/token/keyword.rs rename to src/parser/v1/token/keyword.rs diff --git a/src/v1/parser/token/mod.rs b/src/parser/v1/token/mod.rs similarity index 100% rename from src/v1/parser/token/mod.rs rename to src/parser/v1/token/mod.rs diff --git a/src/v1/parser/token/symbol.rs b/src/parser/v1/token/symbol.rs similarity index 100% rename from src/v1/parser/token/symbol.rs rename to src/parser/v1/token/symbol.rs diff --git a/src/v1/parser/val.rs b/src/parser/v1/val.rs similarity index 97% rename from src/v1/parser/val.rs rename to src/parser/v1/val.rs index 27457fc..2683ed3 100644 --- a/src/v1/parser/val.rs +++ b/src/parser/v1/val.rs @@ -2,7 +2,7 @@ use super::{CharCursor, MaybeParsable, ParserError, ParserErrors, Symbol, Token, use std::fmt::Debug; #[derive(Clone, PartialEq, Eq)] -pub enum Val { +pub enum Literal { String(String), Char(char), Number(Number), @@ -16,7 +16,7 @@ pub struct Number { pub ty: Option, } -impl MaybeParsable for Val { +impl MaybeParsable for Literal { fn maybe_parse(cursor: &mut TokenCursor, _: &mut ParserErrors) -> Result, ParserError> { let inst = cursor.expect_peek()?; let mut res = match &inst.token { @@ -85,7 +85,7 @@ pub fn string_from(cursor: &mut CharCursor) -> Result { } } -impl Debug for Val { +impl Debug for Literal { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::String(str) => str.fmt(f), diff --git a/src/v2/parser/body.rs b/src/parser/v2/body.rs similarity index 100% rename from src/v2/parser/body.rs rename to src/parser/v2/body.rs diff --git a/src/v2/parser/cursor.rs b/src/parser/v2/cursor.rs similarity index 100% rename from src/v2/parser/cursor.rs rename to src/parser/v2/cursor.rs diff --git a/src/v2/parser/error.rs b/src/parser/v2/error.rs similarity index 100% rename from src/v2/parser/error.rs rename to src/parser/v2/error.rs diff --git a/src/v2/parser/expr.rs b/src/parser/v2/expr.rs similarity index 100% rename from src/v2/parser/expr.rs rename to src/parser/v2/expr.rs diff --git a/src/v2/mod.rs b/src/parser/v2/mod.rs similarity index 73% rename from src/v2/mod.rs rename to src/parser/v2/mod.rs index 790b956..27c633e 100644 --- a/src/v2/mod.rs +++ b/src/parser/v2/mod.rs @@ -1,8 +1,17 @@ -use std::{ffi::OsStr, io::{BufRead, BufReader}}; +use std::io::{BufRead, BufReader}; -use parser::{print_error, CharCursor, Module, Statement}; +mod body; +mod cursor; +mod error; +mod expr; +mod module; +mod util; -mod parser; +pub use body::*; +pub use cursor::*; +pub use error::*; +pub use expr::*; +pub use module::*; pub fn parse_file(file: &str) { match Module::parse(&mut CharCursor::from(file)) { diff --git a/src/v2/parser/mod.rs b/src/parser/v2/module.rs similarity index 91% rename from src/v2/parser/mod.rs rename to src/parser/v2/module.rs index 9dafc2c..fd76fee 100644 --- a/src/v2/parser/mod.rs +++ b/src/parser/v2/module.rs @@ -1,16 +1,5 @@ use std::{collections::HashSet, fmt::Debug, sync::LazyLock}; - -mod body; -mod cursor; -mod error; -mod expr; -mod util; - -pub use body::*; -pub use cursor::*; -pub use error::*; -pub use expr::*; -use util::WHITESPACE_SET; +use super::{util::WHITESPACE_SET, Body, CharCursor, ParserError}; #[derive(Debug)] pub struct Module { diff --git a/src/v2/parser/util.rs b/src/parser/v2/util.rs similarity index 100% rename from src/v2/parser/util.rs rename to src/parser/v2/util.rs diff --git a/src/util/bits.rs b/src/util/bits.rs new file mode 100644 index 0000000..a805adf --- /dev/null +++ b/src/util/bits.rs @@ -0,0 +1,120 @@ +pub const fn u(x: i32) -> u32 { + unsafe { std::mem::transmute(x) } +} + +pub const fn low_mask(high: u32, low: u32) -> u32 { + (2u32.unbounded_shl(high - low)).wrapping_sub(1) +} + +pub const fn mask(high: u32, low: u32) -> u32 { + low_mask(high, low).unbounded_shl(low) +} + +pub const fn bits(x: i32, high: u32, low: u32) -> u32 { + let x = u(x); + x.unbounded_shr(low) & low_mask(high, low) +} + +pub const fn bit(x: i32, i: u32) -> u32 { + let x = u(x); + x.unbounded_shr(i) & 2u32 << i +} + +pub const fn in_bit_range(x: i32, high: u32, low: u32) -> bool { + if x < 0 { + if high == low { + return false; + } + (bits(x, high - 1, low) | !mask(high - 1, low)) == u(x) + } else { + bits(x, high, low) << low == u(x) + } +} + +// use std::ops::{Add, Shl, Shr}; +// +// pub const fn u(x: i32) -> u32 { +// unsafe { std::mem::transmute(x) } +// } +// +// pub const fn low_mask(high: u8, low: u8) -> u32 { +// (2u32.unbounded_shl(high - low)).wrapping_sub(1) +// } +// +// pub const fn mask(high: u8, low: u8) -> u32 { +// low_mask(high, low).unbounded_shl(low) +// } +// +// pub const fn bits(x: i32, high: u8, low: u8) -> u32 { +// let x = u(x); +// x.unbounded_shr(low) & low_mask(high, low) +// } +// +// pub const fn bit(x: i32, i: u32) -> u32 { +// let x = u(x); +// x.unbounded_shr(i) & 2u32 << i +// } +// +// pub const fn in_bit_range + Shr>( +// x: T, +// high: u8, +// low: u8, +// ) -> bool { +// if x < 0 { +// if high == low { +// return false; +// } +// (bits(x, high - 1, low) | !mask(high - 1, low)) == u(x) +// } else { +// bits(x, high, low) << low == u(x) +// } +// } + +// pub struct Bits< +// T: Shl + Shr, +// const S: bool, +// const H: u8, +// const L: u8, +// >(T); +// pub struct U32Bits(u32); +// +// impl + Shr + Add, const S: bool, const H: u8, const L: u8> +// Bits +// { +// pub const fn new(val: T) -> Self { +// assert!(in_bit_range(val, H, L)); +// Self(val + L) +// } +// } + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn get_bits() { + assert_eq!(bits(0b10111010, 5, 3), 0b111); + assert_eq!(bits(0b10111010, 7, 5), 0b101); + assert_eq!(bits(0b10111010, 2, 0), 0b010); + assert_eq!(bits(0b10111010, 7, 7), 0b1); + assert_eq!(bits(0b1, 0, 0), 0b1); + assert_eq!(bits(0b1, 1, 1), 0b0); + } + + #[test] + fn range() { + assert!(!in_bit_range(0b00111100, 5, 3)); + assert!(!in_bit_range(0b00111100, 4, 2)); + assert!(in_bit_range(0b000111100, 5, 2)); + assert!(in_bit_range(0b000000001, 0, 0)); + assert!(in_bit_range(0b000001000, 3, 3)); + + assert!(!in_bit_range(-3, 1, 0)); + assert!(!in_bit_range(-5, 2, 2)); + assert!(!in_bit_range(-5, 4, 3)); + assert!(in_bit_range(-1, 1, 0)); + assert!(in_bit_range(-4, 2, 0)); + assert!(in_bit_range(-5, 3, 2)); + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 9fb957c..9d34b52 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,38 +1,5 @@ -use core::fmt; +mod padder; +mod bits; -pub struct Padder<'buf> { - buf: &'buf mut (dyn fmt::Write + 'buf), - on_newline: bool, -} - -impl fmt::Write for Padder<'_> { - fn write_str(&mut self, s: &str) -> fmt::Result { - for s in s.split_inclusive('\n') { - if self.on_newline { - self.buf.write_str(" ")?; - } - - self.on_newline = s.ends_with('\n'); - self.buf.write_str(s)?; - } - - Ok(()) - } - - fn write_char(&mut self, c: char) -> fmt::Result { - if self.on_newline { - self.buf.write_str(" ")?; - } - self.on_newline = c == '\n'; - self.buf.write_char(c) - } -} - -impl<'buf> Padder<'buf> { - pub fn new(buf: &'buf mut (dyn fmt::Write + 'buf)) -> Self { - Self { - buf, - on_newline: false, - } - } -} +pub use padder::*; +pub use bits::*; diff --git a/src/util/padder.rs b/src/util/padder.rs new file mode 100644 index 0000000..9fb957c --- /dev/null +++ b/src/util/padder.rs @@ -0,0 +1,38 @@ +use core::fmt; + +pub struct Padder<'buf> { + buf: &'buf mut (dyn fmt::Write + 'buf), + on_newline: bool, +} + +impl fmt::Write for Padder<'_> { + fn write_str(&mut self, s: &str) -> fmt::Result { + for s in s.split_inclusive('\n') { + if self.on_newline { + self.buf.write_str(" ")?; + } + + self.on_newline = s.ends_with('\n'); + self.buf.write_str(s)?; + } + + Ok(()) + } + + fn write_char(&mut self, c: char) -> fmt::Result { + if self.on_newline { + self.buf.write_str(" ")?; + } + self.on_newline = c == '\n'; + self.buf.write_char(c) + } +} + +impl<'buf> Padder<'buf> { + pub fn new(buf: &'buf mut (dyn fmt::Write + 'buf)) -> Self { + Self { + buf, + on_newline: false, + } + } +}