Compare commits

...
51 Commits
Author SHA1 Message Date
iris d66f8f02b7 modrm 2026-06-16 21:03:30 -04:00
iris 4e06e474ea add 2026-06-16 02:29:56 -04:00
iris 84e184518f small stuff 2026-06-13 13:47:12 -04:00
iris 1e39675c29 impl sub 2026-06-13 13:44:03 -04:00
iris bdeb0d821c more rex stuff 2026-06-12 23:27:33 -04:00
iris 550d58d6f4 more rex stuff 2026-06-12 22:57:01 -04:00
iris fa2a6db2e2 edit rex fn 2026-06-12 22:39:58 -04:00
iris 571ff70fa1 convert programs into tests 2026-06-12 22:24:35 -04:00
iris 715a50b1fa add rest of basic mov instructions 2026-06-12 21:53:22 -04:00
iris 397176759d comment 2026-06-12 17:40:48 -04:00
iris 51bdc5c684 nasm test cache 2026-06-12 17:38:22 -04:00
iris e2ebf5c681 switch away from macro for nasm tests 2026-06-12 17:15:30 -04:00
iris 7280f7b071 give immediates a sign & fix stuff 2026-06-12 17:08:42 -04:00
iris e199620856 update immediates for tests 2026-06-12 05:33:22 -04:00
iris 663e6648ca gt8 sip 2026-06-12 05:15:23 -04:00
iris ceebcdc0e3 tests, but at what cost 2026-06-12 05:09:38 -04:00
iris 7004cdbfe2 arst 2026-06-11 22:07:21 -04:00
iris 433c3114d5 fixes 2026-06-11 22:06:19 -04:00
iris b03f755252 remove intermediate enum / directly encode assembly 2026-06-11 21:49:04 -04:00
iris 91f5db6950 idea (doesn't compile) 2026-06-11 17:39:44 -04:00
iris ddf63ad817 lots of refactoring 2026-06-11 00:15:09 -04:00
iris bc922a6086 delete comment 2026-06-09 01:07:14 -04:00
iris ea305909a0 WINDOWS HELLO WORLD (scuffed) 2026-06-09 01:02:40 -04:00
iris e4acaf40aa IMPORTS WORKING 2026-06-09 00:08:26 -04:00
iris 6bc502d284 work 2026-06-08 20:30:21 -04:00
iris c17122679e PE import start (fixed header size -> sections work) 2026-06-08 17:31:01 -04:00
iris c9add923be pe work 2026-06-07 21:22:32 -04:00
iris a086fa6590 push and pop 2026-06-06 23:47:38 -04:00
iris 66710370bf x86_64 call & ret 2026-06-06 23:26:17 -04:00
iris 69cd249671 small stuff 2026-06-06 22:04:11 -04:00
iris ba706ebb73 move x86_64 bin test 2026-06-06 21:31:14 -04:00
iris a3f934be21 remove old code 2026-06-06 21:20:21 -04:00
iris ef35509c98 arbitrary addr 2026-06-06 21:19:09 -04:00
iris 4587f687b9 arch refactor + backend ir start 2026-06-06 21:00:39 -04:00
iris 0ac7c5cc02 pie elf 2026-06-04 20:35:01 -04:00
iris 978bac88ed linking / symbol stuff 2026-06-04 04:28:14 -04:00
iris 380a0f977a x86_64 compiler + elf output (can compile code that returns exit code) 2026-06-03 01:50:43 -04:00
iris 473ddab0d4 x86_64 arch + asm start 2026-06-02 03:24:21 -04:00
iris c2a8c50a6d ir start 2026-06-01 23:03:35 -04:00
iris 1d568f8ce3 steal from jai 2026-06-01 22:40:24 -04:00
iris d864adfd05 work 2026-04-18 00:16:03 -04:00
iris b3f77076d4 work 2026-04-17 18:51:12 -04:00
iris 2f91e454dd stuff 2026-04-17 01:49:43 -04:00
iris e5ae506a84 work 2026-04-17 00:09:00 -04:00
iris 83edad0cd8 lol 2026-04-12 17:38:35 -04:00
iris f702f47714 gaming 2026-04-12 17:26:39 -04:00
iris 2582e8c87e work 2026-04-11 15:21:03 -04:00
iris 229b026573 work 2026-04-11 03:50:43 -04:00
iris 29316e6353 work 2026-04-10 16:13:45 -04:00
iris bdf08ce52c stuff 2026-04-08 23:28:50 -04:00
iris edabc22431 parser3 2026-04-08 17:54:42 -04:00
173 changed files with 3666 additions and 10783 deletions
Generated
+109
View File
@@ -2,6 +2,115 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "arrayvec"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "bitcode"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a6ed1b54d8dc333e7be604d00fa9262f4635485ffea923647b6521a5fff045d"
dependencies = [
"arrayvec",
"bitcode_derive",
"bytemuck",
"glam",
"serde",
]
[[package]]
name = "bitcode_derive"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "238b90427dfad9da4a9abd60f3ec1cdee6b80454bde49ed37f1781dd8e9dc7f9"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "bytemuck"
version = "1.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
[[package]]
name = "glam"
version = "0.33.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "898f5a568a84989b6c0f8caa50a93074b97dbdc58fc6d9543157bb4562758933"
[[package]]
name = "lang"
version = "0.1.0"
dependencies = [
"bitcode",
]
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+6 -1
View File
@@ -1,4 +1,9 @@
[package]
name = "lang"
version = "0.1.0"
edition = "2021"
edition = "2024"
[dependencies]
[dev-dependencies]
bitcode = "0.6.9"
-22
View File
@@ -1,22 +0,0 @@
# the
my child (programming language)
everything is subject to change rn, and this probably isn't up to date
also nothing is super well tested right now so I'm sure there are bugs I don't think exist
`cargo run -- data/test.lang`
currently working!!:
- functions (arguments, returning)
- assembly blocks (input, output for expression)
- structs (construction, field access, modifying, nesting)
todo:
- generics (groundwork is there I think)
- traits (unsure exactly how I wanna do this, either way need number ones)
- actually handle jumps & LIs that are too large
- iterators?
- borrow checking
- basic optimization: use registers, remove temp var moves
- Cow str
-35
View File
@@ -1,35 +0,0 @@
fn main() {
// let x = 3;
let y = 4 + 4 + 5;
let z = 1 * 2 - 3 / test * 4;
let r = 1-2.5 + 3;
let w = 1 * (2 - 3) / "test" - 7
let a = test('3');
let c = '3' ;
test(5);
return 5 +
a;
r = )!!;
r = !3;
r = 3 + !;
let b = (test2.func)(3 + 4)(8)("a");
let x = {
return 5;
let a = 3;
b
};
exit(3, let, "hello");
}
fn test() {
let r = 3;
let a = }
}
fn test2() {
let a anerit;
}
fn test3() {
let x = 3
-273
View File
@@ -1,273 +0,0 @@
println("testy");
let x = 3;
print_dec(x);
subtest();
start();
fn subtest() {
fn el() {
println("helo el");
}
el();
}
struct Test {
a: 64,
b: 64,
c: 64,
}
struct Test2 {
a: 64,
b: Test,
c: Test,
}
fn start() {
println("Helld!");
print_hex(rem(10, 7));
println("");
println("Hello World!!!!!");
thinger();
let x = 3;
if not(not(lt(x, 5))) {
println("tada!");
};
println("before:");
x = 0;
loop {
if not(lt(x, 10)) {
break;
};
println("RAAAAA");
x = add(x, 1);
};
println("after");
let infer_me: slice<_> = "hello";
print(tester());
let test: Test = Test {
a: 10,
b: 4,
c: 0,
};
structer(test);
arger("a", "b", "c");
let z = sub(test.a, 10);
print_hex(add(mul(sub(add(10, test.b), 1), 3), z));
print("test: 0x");
print_hex(31);
println("");
generic();
exit(0);
}
fn structer(test: Test) {
print("test {\n a: ");
print_dec(test.a);
print("\n b: ");
print_dec(test.b);
print("\n c: ");
print_dec(test.c);
println("\n}");
print("update c: ");
test.c = add(test.a, test.b);
print_dec(test.c);
println("");
let test2: Test2 = Test2 {
a: 3,
b: test,
c: test,
};
test2.c.c = 20;
print("test2.b.c: ");
print_dec(test2.b.c);
println("");
print("test2.c.c: ");
print_dec(test2.c.c);
println("");
}
struct GTest<T, U, V> {
a: T,
b: U,
c: V,
}
fn generic() {
let gt = GTest {
a: 39,
b: "hello",
c: 40,
};
print("generic: ");
print_dec(gt.a);
print(", ");
print(gt.b);
print_dec(gt.c);
println("");
}
fn thinger() {
print("estamos jugando\n");
}
fn unused() {
print("el unused\n");
}
fn println(msg: slice<8>) {
print(msg);
print("\n");
}
fn print(msg: slice<8>) {
asm (a1 = msg@) {
ld a2, 8, a1
ld a1, 0, a1
li a0, 1
li a7, 64
ecall
}
}
fn print_hex(x: 64) {
let i = 64;
loop {
i = sub(i, 4);
let c = and(shr(x, i), 15);
if gt(c, 9) {
c = add(c, 7);
};
c = add(c, 48);
asm (a1 = c@) {
li a2, 1
li a0, 1
li a7, 64
ecall
};
if lt(i, 1) {
break;
};
}
}
fn print_dec(x: 64) {
let i = 1;
loop {
if gt(i, x) {
if lt(i, 2) {
print("0");
return;
};
break;
};
i = mul(i, 10);
};
let found = 0;
loop {
i = div(i, 10);
let c = rem(div(x, i), 10);
if and(lt(c, 1), not(found)) {
continue;
};
found = 1;
if gt(c, 9) {
c = add(c, 7);
};
c = add(c, 48);
asm (a1 = c@) {
li a2, 1
li a0, 1
li a7, 64
ecall
};
if lt(i, 2) {
break;
};
};
if not(found) {
print("0");
}
}
fn add(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
add t0, t0, t1
}
}
fn mul(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
mul t0, t0, t1
}
}
fn div(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
div t0, t0, t1
}
}
fn sub(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
sub t0, t0, t1
}
}
fn rem(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
rem t0, t0, t1
}
}
fn shr(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
srl t0, t0, t1
}
}
fn shl(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
sll t0, t0, t1
}
}
fn lt(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
slt t0, t0, t1
}
}
fn gt(a: 64, b: 64) -> 64 {
lt(b, a)
}
fn and(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
and t0, t0, t1
}
}
fn not(a: 64) -> 64 {
asm (t0 = a, out = t0) {
xori t0, t0, 1
}
}
fn arger(a: slice<8>, b: slice<8>, c: slice<8>) {
print(a);
print(b);
println(c);
}
fn exit(status: 64) {
asm (a0 = status) {
li a7, 93
ecall
};
}
fn tester() -> slice<8> {
"hola\n"
}
-6
View File
@@ -1,6 +0,0 @@
import util;
util.println("hello!");
let x = 39;
util.exit(x);
-146
View File
@@ -1,146 +0,0 @@
fn exit(status: 64) {
asm (a0 = status) {
li a7, 93
ecall
};
}
fn println(msg: slice<8>) {
print(msg);
print("\n");
}
fn print(msg: slice<8>) {
asm (a1 = msg@) {
ld a2, 8, a1
ld a1, 0, a1
li a0, 1
li a7, 64
ecall
}
}
fn print_hex(x: 64) {
let i = 64;
loop {
i = sub(i, 4);
let c = and(shr(x, i), 15);
if gt(c, 9) {
c = add(c, 7);
};
c = add(c, 48);
asm (a1 = c@) {
li a2, 1
li a0, 1
li a7, 64
ecall
};
if lt(i, 1) {
break;
};
}
}
fn print_dec(x: 64) {
let i = 1;
loop {
if gt(i, x) {
if lt(i, 2) {
print("0");
return;
};
break;
};
i = mul(i, 10);
};
let found = 0;
loop {
i = div(i, 10);
let c = rem(div(x, i), 10);
if and(lt(c, 1), not(found)) {
continue;
};
found = 1;
if gt(c, 9) {
c = add(c, 7);
};
c = add(c, 48);
asm (a1 = c@) {
li a2, 1
li a0, 1
li a7, 64
ecall
};
if lt(i, 2) {
break;
};
};
if not(found) {
print("0");
}
}
fn add(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
add t0, t0, t1
}
}
fn mul(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
mul t0, t0, t1
}
}
fn div(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
div t0, t0, t1
}
}
fn sub(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
sub t0, t0, t1
}
}
fn rem(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
rem t0, t0, t1
}
}
fn shr(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
srl t0, t0, t1
}
}
fn shl(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
sll t0, t0, t1
}
}
fn lt(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
slt t0, t0, t1
}
}
fn gt(a: 64, b: 64) -> 64 {
lt(b, a)
}
fn and(a: 64, b: 64) -> 64 {
asm (t0 = a, t1 = b, out = t0) {
and t0, t0, t1
}
}
fn not(a: 64) -> 64 {
asm (t0 = a, out = t0) {
xori t0, t0, 1
}
}
-51
View File
@@ -1,51 +0,0 @@
resolution overview
loop {
resolve idents
resolve + type check / match instructions
URes.resolve(errs) can return: failed, ok(id), waiting
each instruction keeps track of progress
eg. fns: match each arg
updates to whether it's waiting or finished: ok or err
only finish if no sub tasks are waiting
finished = "macro ready"
run macros / code modification on "macro ready" (fns, structs)
eg. insert instructions
hygienic; only take in from scope
add inserted instructions to unresolved list
finished = "analysis ready"
analysis on "analysis ready" fns
eg. does this return in all code paths
finished + all correct = "ready to lower"
lower "ready to lower" fns
run lowered const fns / var expressions
}
move names into separate vec with origins?
make struct fields a vec, resolve to index?
inner values that auto generate map function:
enum Thing<inner T> {
A(T),
B(T, T),
C
}
or
#[derive(Map(T))]
enum Thing<T> { ... }
// scoping here is bad :woozy:
{([<
std::Option:(u32)::Some(3)
func:(u32)("hello", test, 3);
std::Option:[u32]::Some(3)
func:[T]("hello", test, 3);
std::Option::<u32>::Some(3)
func::<u32>(3)
std.Option.[u32].Some(3)
func.[T]("hello", test, 3);
std::Option:<u32>::Some(3)
func:<u32>(3)
+13
View File
@@ -0,0 +1,13 @@
use crate::{
backend::{Addr, LinkedProgram, Program},
io::CompilerMsg,
};
pub mod x86_64;
pub trait Arch: Sized {
const NAME: &str;
type Asm;
type Addr: Addr;
fn compile(p: &Program<Self>) -> Result<LinkedProgram<Self::Addr>, CompilerMsg>;
}
+83
View File
@@ -0,0 +1,83 @@
use std::collections::HashMap;
use super::*;
use crate::backend::{LibImport, LinkedProgram, SymImport, SymTable, Symbol};
use util::*;
pub struct Encoder<'a> {
pub code: Code,
pub sym_tab: SymTable<u64>,
pub sym_refs: HashMap<Symbol, Vec<usize>>,
pub program: &'a Program<X86_64>,
}
pub fn compile(p: &Program<X86_64>) -> Result<LinkedProgram<u64>, CompilerMsg> {
let mut encoder = Encoder::new(p);
p.encode_data(&mut encoder.code.bytes, &mut encoder.sym_tab);
for f in &p.funcs {
let addr = encoder.code.bytes.len();
encoder.sym_tab.insert(f.sym, addr as u64);
for instr in &f.instrs {
encoder.compile_instr(instr)?;
}
}
for (pos, sym) in encoder.code.missing.drain(..) {
let info = encoder.program.sym_info(sym);
if info.external {
encoder.sym_refs.entry(sym).or_default().push(pos);
} else {
let addr = encoder
.sym_tab
.get(sym)
.ok_or(CompilerMsg::from(format!("missing symbol {}", info.name)))?;
encoder.code.bytes[pos..pos + 4].copy_from_slice(&addr_offset(pos, addr))
}
}
let imports = p
.external
.iter()
.map(|e| LibImport {
name: e.file.clone(),
syms: e
.syms
.iter()
.map(|&s| SymImport {
name: p.sym_info(s).name.clone(),
usages: encoder.sym_refs.entry(s).or_default().clone(),
})
.collect(),
})
.collect();
Ok(LinkedProgram {
code: encoder.code.bytes,
entry: p.entry.and_then(|e| encoder.sym_tab.get(e)),
imports,
})
}
type BInstr = crate::backend::Instr<X86_64>;
impl<'a> Encoder<'a> {
fn compile_instr(&mut self, instr: &BInstr) -> Result<(), CompilerMsg> {
match instr {
BInstr::Asm(asm) => {
self.code.extend(asm);
}
_ => todo!(),
}
Ok(())
}
pub fn new(program: &'a Program<X86_64>) -> Self {
Self {
code: Code::default(),
sym_tab: SymTable::new(program.sym_count()),
sym_refs: Default::default(),
program,
}
}
}
+280
View File
@@ -0,0 +1,280 @@
use super::*;
use crate::backend::Symbol;
type ERes = Result<(), CompilerMsg>;
/// machine code
#[derive(Default)]
pub struct Code {
pub(super) bytes: Vec<u8>,
pub(super) missing: Vec<(usize, Symbol)>,
}
impl Code {
pub fn mov(&mut self, dst: impl Into<RegMem>, src: impl Into<RegImmMem>) -> ERes {
let dst = dst.into();
let src = src.into();
match dst {
RegMem::Reg(mut dst) => match src {
RegImmMem::Reg(src) => {
if dst.width() != src.width() {
return Err("src and dst are not same width".into());
}
if dst.incompatible(&src) {
return Err("incompatible registers due to rex".into());
}
let width = dst.width();
self.prefix16(width);
self.rex(width, src, 0, dst);
self.bytes.push(0x88 | width.not8());
self.modrm(src, dst);
}
RegImmMem::Imm(src) => {
let src_width = src.width_unsigned()?;
if src_width > dst.width() {
return Err("immediate cannot fit in register".into());
}
self.prefix16(dst);
if dst.width() == Width::B64 && src_width <= Width::B32 && src.0 < 0 {
// use different op that sign extends for less bytes
self.bytes
.extend([rex(dst, 0, 0, dst), 0xc7, 0xc0 | dst.base()]);
self.imm(src, Width::B32);
} else {
if src_width <= Width::B32 {
dst = dst.lower64();
}
self.rex(dst, 0, 0, dst);
self.bytes.push(0xb0 | (dst.not8() << 3) | dst.base());
self.imm(src, dst.width());
}
}
RegImmMem::Mem(src) => {
if src.width != dst.width() {
return Err("register & memory sizes don't match".into());
}
if dst.high() && src.reg.gt8() {
return Err("registers incompatible (REX)".into());
}
self.prefix32(&src)?;
self.prefix16(dst);
self.rex(dst, dst, 0, src);
self.bytes.push(0x8a | dst.not8());
self.modrm(dst, src);
}
},
RegMem::Mem(dst) => match src {
RegImmMem::Reg(src) => {
if src.width() != dst.width {
return Err("register & memory sizes don't match".into());
}
if src.high() && dst.reg.gt8() {
return Err("registers incompatible (REX)".into());
}
self.prefix32(&dst)?;
self.prefix16(src);
self.rex(dst, src, 0, dst);
self.bytes.push(0x88 | src.not8());
self.modrm(src, dst);
}
RegImmMem::Imm(src) => {
let encode_width = dst.width.min(Width::B32);
let src_width = if dst.width == Width::B64 {
src.width_signed()
} else {
src.width_unsigned()
}?;
if src_width == Width::B64 {
return Err("cannot move 64 bit immediate into memory".into());
}
if src_width > dst.width {
return Err("source cannot fit in destination".into());
}
self.prefix32(&dst)?;
self.prefix16(encode_width);
self.rex(dst, 0, 0, dst);
self.bytes.push(0xc6 | encode_width.not8());
self.modrm(0, dst);
self.imm(src, encode_width);
}
RegImmMem::Mem(_) => return Err("cannot move memory to memory".into()),
},
}
Ok(())
}
pub fn push(&mut self, reg: impl Into<RegImmMem>) -> ERes {
match reg.into() {
RegImmMem::Reg(reg) => match reg.width() {
Width::B64 => {
if reg.gt8() {
self.bytes.push(0x41);
}
self.bytes.push(0x50 | reg.base());
}
Width::B16 => {}
_ => return Err("register must be 64 or 16 bit".into()),
},
RegImmMem::Imm(imm) => match imm.width_unsigned()? {
Width::B8 => {
self.bytes.push(0x6a);
self.bytes.push(imm.0 as u8);
}
Width::B16 | Width::B32 => {
self.bytes.push(0x68);
self.bytes.extend((imm.0 as u32).to_le_bytes());
}
Width::B64 => return Err("immediate must be 32 bit or less".into()),
},
RegImmMem::Mem(mem) => todo!(),
}
Ok(())
}
pub fn pop(&mut self, reg: Reg) -> ERes {
match reg.width() {
Width::B64 | Width::B16 => (),
_ => return Err("register must be 64 or 16 bit".into()),
}
self.prefix16(reg);
if reg.gt8() {
self.bytes.push(0x41);
}
self.bytes.push(0x58 | reg.base());
Ok(())
}
pub fn lea(&mut self, dst: Reg, sym: Symbol) {
self.rex(1, dst, 0, 0);
self.bytes.push(0x8d);
self.modrm(dst, sym);
}
pub fn int(&mut self, code: u8) {
self.bytes.extend([0xcd, code])
}
pub fn syscall(&mut self) {
self.bytes.extend([0x0f, 0x05])
}
pub fn call(&mut self, sym: Symbol) {
self.bytes.push(0xe8);
self.sym_offset4(sym);
}
pub fn call_mem(&mut self, sym: Symbol) {
self.bytes.extend([0xff, 0x15]);
self.sym_offset4(sym);
}
pub fn ret(&mut self) {
self.bytes.push(0xc3);
}
fn add_sub(&mut self, dst: Reg, src: impl Into<Imm>, ext: u8) -> ERes {
let mut src = src.into();
let mut width = src.width_signed()?;
let dst_width = dst.width().min(Width::B32);
self.prefix16(dst_width);
self.rex(dst, 0, 0, dst);
if width > dst_width {
width = src.width_unsigned()?;
if dst.width() == Width::B64 || width > dst_width {
return Err("immediate overflow".into());
}
src = src.reinterpret(dst_width);
width = src.width_signed()?;
}
if dst.width() == Width::B8 {
self.bytes.push(0x80);
} else if width == Width::B8 {
self.bytes.push(0x83);
} else {
self.bytes.push(0x81);
width = dst_width;
}
self.modrm(ext, dst);
self.imm(src, width);
Ok(())
}
pub fn add(&mut self, dst: Reg, src: impl Into<Imm>) -> ERes {
self.add_sub(dst, src, 0)
}
pub fn sub(&mut self, dst: Reg, src: impl Into<Imm>) -> ERes {
self.add_sub(dst, src, 5)
}
fn prefix16(&mut self, width: impl Into<Width>) {
if width.into() == Width::B16 {
self.bytes.push(0x66);
}
}
fn prefix32(&mut self, mem: &Mem) -> Result<(), CompilerMsg> {
match mem.reg.width() {
Width::B8 | Width::B16 => return Err("invalid register width".into()),
Width::B32 => self.bytes.push(0x67),
Width::B64 => (),
}
Ok(())
}
fn rex(&mut self, w: impl RexW, r: impl RexBit, x: u8, b: impl RexBit) {
if w.rexw() || r.rex() || x.rex() || b.rex() | r.req() | b.req() {
self.bytes.push(rex(w, r, x, b));
}
}
fn modrm(&mut self, reg: impl ModRMReg, rm: impl ModRMRM) {
let addr = rm.addr();
let mod_ = match addr {
EffAddr::Mem0 | EffAddr::Sym(_) => 0b00,
EffAddr::Mem8(_) => 0b01,
EffAddr::Mem32(_) => 0b10,
EffAddr::None => 0b11,
};
self.bytes
.push(((mod_ as u8) << 6) | (reg.val() << 3) | rm.rm());
if !matches!(addr, EffAddr::None) && rm.rm() == 0b100 {
// SIB
self.bytes.push(0x24);
}
match addr {
EffAddr::Mem8(disp) => self.bytes.push(disp as u8),
EffAddr::Mem32(disp) => self.bytes.extend(disp.to_le_bytes()),
EffAddr::Sym(sym) => self.sym_offset4(sym),
_ => (),
}
}
/// inserts a 32 bit offset from a symbol
fn sym_offset4(&mut self, sym: Symbol) {
let pos = self.bytes.len();
self.bytes.extend([0; 4]);
self.missing.push((pos, sym));
}
pub fn extend(&mut self, other: &Code) {
let pos = self.bytes.len();
self.bytes.extend(&other.bytes);
self.missing
.extend(other.missing.iter().map(|&(p, s)| (pos + p, s)));
}
fn imm(&mut self, imm: Imm, width: Width) {
self.bytes.extend(&imm.0.to_le_bytes()[..width.bytes()]);
}
}
pub fn encode(f: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>) -> Result<Code, CompilerMsg> {
let mut code = Code::default();
f(&mut code)?;
Ok(code)
}
+30
View File
@@ -0,0 +1,30 @@
mod compile;
mod encode;
mod reg;
#[cfg(test)]
mod test;
mod types;
mod util;
use crate::{
arch::Arch,
backend::{LinkedProgram, Program},
io::CompilerMsg,
};
pub use compile::*;
pub use encode::*;
pub use reg::*;
pub use types::*;
use util::*;
pub struct X86_64;
impl Arch for X86_64 {
const NAME: &str = "x86_64";
type Asm = Code;
type Addr = u64;
fn compile(p: &Program<Self>) -> Result<LinkedProgram<Self::Addr>, CompilerMsg> {
compile(p)
}
}
+197
View File
@@ -0,0 +1,197 @@
#[derive(Clone, Copy, PartialEq)]
pub struct Reg {
val: u8,
high: bool,
width: Width,
}
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
#[repr(u8)]
pub enum Width {
B8 = 0,
B16 = 1,
B32 = 2,
B64 = 3,
}
def_regs! {
0b0000 : rax eax ax al,
0b0001 : rcx ecx cx cl !_,
0b0010 : rdx edx dx dl,
0b0011 : rbx ebx bx bl,
0b0100 : rsp esp sp spl norex=ah !_,
0b0101 : rbp ebp bp bpl norex=ch,
0b0110 : rsi esi si sil norex=dh !_,
0b0111 : rdi edi di dil norex=bh,
0b1000 : r8 r8d r8w r8b,
0b1001 : r9 r9d r9w r9b !_,
0b1010 : r10 r10d r10w r10b,
0b1011 : r11 r11d r11w r11b,
0b1100 : r12 r12d r12w r12b !_,
0b1101 : r13 r13d r13w r13b,
0b1110 : r14 r14d r14w r14b,
0b1111 : r15 r15d r15w r15b,
}
impl Reg {
pub fn base(&self) -> u8 {
self.val & 0b111
}
/// checks if register is not one of the first 8 (0-7)
pub fn gt8(&self) -> bool {
self.val >= 0b1000
}
pub fn gt4(&self) -> bool {
self.val >= 0b0100
}
pub fn width(&self) -> Width {
self.width
}
pub fn not8(&self) -> u8 {
self.width.not8()
}
pub fn high(&self) -> bool {
self.high
}
/// if self has 64 bit width, changes width to 32 bit
pub fn lower64(&self) -> Self {
let mut new = *self;
new.width = new.width.min(Width::B32);
new
}
pub fn requires_rex(&self) -> bool {
self.gt8()
|| self.width == Width::B64
|| (self.gt4() && self.width == Width::B8 && !self.high)
}
pub fn incompatible(&self, other: &Reg) -> bool {
(self.requires_rex() && other.high) || (self.high && other.requires_rex())
}
const fn new(val: u8, width: Width, high: bool) -> Self {
Self { val, high, width }
}
}
impl Width {
pub const fn max_val(&self) -> u64 {
match self {
Self::B64 => u64::MAX,
Self::B32 => u32::MAX as u64,
Self::B16 => u16::MAX as u64,
Self::B8 { .. } => u8::MAX as u64,
}
}
pub fn min(self, other: Self) -> Self {
if self <= other { self } else { other }
}
pub const fn bytes(&self) -> usize {
match self {
Self::B64 => 8,
Self::B32 => 4,
Self::B16 => 2,
Self::B8 { .. } => 1,
}
}
/// greater than 8 bits
pub const fn not8(&self) -> u8 {
!matches!(self, Self::B8) as u8
}
}
macro_rules! filter {
($($filtered:ident)*; ! $_:tt $($item:ident)*; $($rest:tt)*) => {
filter!($($filtered)* $($item)*; $($rest)*)
};
($($filtered:ident)*; $($item:ident)*; $($rest:tt)*) => {
filter!($($filtered)*; $($rest)*)
};
($($filtered:ident)*;) => {
[$($filtered, )*]
};
}
use filter;
macro_rules! def_regs {
($($val:literal : $B64:ident $B32:ident $B16:ident $B8:ident $(norex=$B8H:ident)? $(!$imp:tt)?,)*) => {
$(
#[allow(non_upper_case_globals)]
pub const $B64: Reg = Reg::new($val, Width::B64, false);
#[allow(non_upper_case_globals)]
pub const $B32: Reg = Reg::new($val, Width::B32, false);
#[allow(non_upper_case_globals)]
pub const $B16: Reg = Reg::new($val, Width::B16, false);
#[allow(non_upper_case_globals)]
pub const $B8 : Reg = Reg::new($val, Width::B8 , false);
$(
#[allow(non_upper_case_globals)]
pub const $B8H: Reg = Reg::new($val, Width::B8, true);
)?
)*
impl Reg {
// #[cfg(test)]
// pub const ALL: &[Reg] = &[
// $( $B64, $B32, $B16, $B8, $($B8H,)? )*
// ];
#[cfg(test)]
pub const IMPORTANT: &[Reg] = &
filter!(; $($(!$imp)? $B64 $B32 $B16 $B8 $($B8H)?; )* )
;
pub fn parse(s: &str) -> Option<Self> {
Some(match s.to_lowercase().as_str() {
$(
stringify!($B64) => $B64,
stringify!($B32) => $B32,
stringify!($B16) => $B16,
stringify!($B8 ) => $B8,
$(
stringify!($B8H) => $B8H,
)?
)*
_ => return None,
})
}
}
impl std::fmt::Display for Reg {
#[allow(non_upper_case_globals)]
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", match *self {
$(
$B64 => stringify!($B64),
$B32 => stringify!($B32),
$B16 => stringify!($B16),
$B8 => stringify!($B8),
$(
$B8H => stringify!($B8H),
)?
)*
_ => "UNKNOWN",
})
}
}
};
}
use def_regs;
use crate::arch::x86_64::Imm;
impl From<Reg> for Width {
fn from(value: Reg) -> Self {
value.width
}
}
+54
View File
@@ -0,0 +1,54 @@
mod setup;
use setup::*;
#[test]
fn mov() {
let c = &mut TestCtx::new("mov");
for dst in regs() {
for src in regs() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
for dst in regs() {
for src in mems() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
for dst in regs() {
for src in imms() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
for dst in mems() {
for src in regs() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
for dst in mems() {
for src in imms() {
eq(c, format!("mov {dst}, {src}"), |c| c.mov(dst, src));
}
}
}
#[test]
fn add_sub() {
let c = &mut TestCtx::new("mov");
for dst in regs() {
for src in imms() {
eq(c, format!("add {dst}, {src}"), |c| c.add(dst, src))
}
}
for dst in regs() {
for src in imms() {
eq(c, format!("sub {dst}, {src}"), |c| c.sub(dst, src))
}
}
}
+165
View File
@@ -0,0 +1,165 @@
use crate::arch::x86_64::*;
use std::{collections::HashMap, fs::OpenOptions, io::Write, process::Command};
const DISPS: &[i32] = &[
0x0,
i8::MIN as i32,
i8::MAX as i32,
i16::MIN as i32,
i16::MAX as i32,
i32::MIN,
i32::MAX,
];
const IMMS: &[i128] = &[
0x0,
i8::MIN as i128,
i8::MAX as i128,
i16::MIN as i128,
i16::MAX as i128,
i32::MIN as i128,
i32::MAX as i128,
i64::MIN as i128,
i64::MAX as i128,
u8::MAX as i128,
u8::MAX as i128 + 1,
u16::MAX as i128,
u16::MAX as i128 + 1,
u32::MAX as i128,
u32::MAX as i128 + 1,
i64::MAX as i128,
];
const WIDTHS: &[Width] = &[Width::B8, Width::B16, Width::B32, Width::B64];
pub fn imms() -> impl Iterator<Item = i128> {
IMMS.iter().cloned()
}
pub fn regs() -> impl Iterator<Item = Reg> {
Reg::IMPORTANT.iter().cloned()
}
pub fn mems() -> impl Iterator<Item = Mem> {
gen move {
for &reg in Reg::IMPORTANT {
for &disp in DISPS {
for &width in WIDTHS {
yield mem(reg, disp, width);
}
}
}
}
}
pub struct TestCtx {
path: String,
code: Code,
cache: HashMap<String, Result<Vec<u8>, String>>,
changed: bool,
}
pub fn eq(
ctx: &mut TestCtx,
asm: impl AsRef<str>,
instr: impl FnOnce(&mut Code) -> Result<(), CompilerMsg>,
) {
let asm = asm.as_ref();
let expected = if let Some(val) = ctx.cache.get(asm) {
val
} else {
ctx.changed = true;
let res = nasm(asm);
ctx.cache.insert(asm.to_string(), res);
ctx.cache.get(asm).unwrap()
};
let code = &mut ctx.code;
let res = instr(code);
match (expected, res) {
(Ok(expected), Err(e)) => {
panic!(
"{asm}: failed to compile: {}\nexpected: {expected:x?}",
e.msg
);
}
(Err(e), Ok(_)) => {
let res = &code.bytes[..];
panic!("{asm}: should not have compiled:\n{e}\ngot: {res:x?}");
}
(Err(_), Err(_)) => (),
(Ok(expected), Ok(_)) => {
let res = &code.bytes[..];
if expected != res {
panic!("{asm}: expected {expected:x?}, got {res:x?}")
}
}
}
code.bytes.clear();
}
fn nasm(input: &str) -> Result<Vec<u8>, String> {
let fin = "/tmp/69420nasm_in.asm";
let fout = "/tmp/69420nasm_out.o";
let input = "result:".to_string() + input;
write(fin, input.as_bytes());
run(["nasm", "-w+error", "-felf64", fin, &format!("-o{fout}")])?;
let output = run(["objdump", "--no-addresses", "-dw", "-Mintel", fout])?;
let mut iter = output.lines().skip_while(|l| !l.contains("result")).skip(1);
let res_line = iter.next().unwrap().trim();
let end = res_line.find("\t").unwrap();
let res_line = &res_line[..end];
let bytes = res_line
.trim()
.split(" ")
.map(|s| u8::from_str_radix(s, 16).unwrap())
.collect();
Ok(bytes)
}
fn run<const N: usize>(input: [&str; N]) -> Result<String, String> {
let path = input[0];
let mut cmd = Command::new(path);
cmd.args(&input[1..]);
let output = cmd.output().expect("failed to run");
if output.status.code().unwrap() != 0 {
return Err(output.stderr.try_into().unwrap());
}
Ok(output.stdout.try_into().unwrap())
}
fn write(path: &str, binary: &[u8]) {
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(path)
.expect("Failed to create file");
file.write_all(binary).expect("Failed to write to file");
file.sync_all().expect("Failed to sync file");
}
const CACHE_PATH: &str = "test/nasm_cache";
impl TestCtx {
pub fn new(name: &str) -> Self {
let path = CACHE_PATH.to_string() + "/" + name;
let cache = match std::fs::read(&path) {
Ok(bytes) => bitcode::decode(&bytes).unwrap_or_default(),
Err(_) => Default::default(),
};
Self {
path,
code: Default::default(),
cache,
changed: Default::default(),
}
}
}
impl Drop for TestCtx {
fn drop(&mut self) {
if self.changed {
write(&self.path, &bitcode::encode(&self.cache));
}
}
}
+54
View File
@@ -0,0 +1,54 @@
use super::*;
#[test]
fn hello() -> Result<(), CompilerMsg> {
let mut program = Program::<X86_64>::default();
let text = b"Hello world!\n";
let text_sym = program.ro_data("hello_en", text);
let text2 = "世界、こんにちは!\n";
let text_sym2 = program.ro_data("hello_jp", text2);
let hello2 = program.func(
"hello2",
[Instr::Asm(encode(|c| {
c.mov(ax, 1)?;
c.mov(di, 1)?;
c.lea(rsi, text_sym2);
c.mov(dx, text2.len() as u64)?;
c.syscall();
c.ret();
Ok(())
})?)],
);
let entry = program.func(
"main",
[Instr::Asm(encode(|c| {
c.mov(rdi, 39)?;
c.push(rdi)?;
c.mov(ax, 1)?;
c.mov(di, 1)?;
c.lea(rsi, text_sym);
c.mov(dx, text.len() as u64)?;
c.syscall();
c.call(hello2);
c.mov(ax, 0x3c)?;
c.pop(rdi)?;
c.syscall();
Ok(())
})?)],
);
program.entry = Some(entry);
let linked = program.compile().expect("failed to compile");
let binary = linked.to_elf();
let path = "test/bin/x86_64_test";
write(path, &binary);
println!("running...");
let mut cmd = Command::new(path);
let output = cmd.output().expect("failed to run");
let Some(code) = output.status.code() else {
panic!("no exit code");
};
let result: String = output.stdout.try_into().expect("non ascii output");
assert_eq!(result, "Hello world!\n世界、こんにちは!\n");
assert_eq!(code, 39);
Ok(())
}
+11
View File
@@ -0,0 +1,11 @@
mod linux;
mod util;
mod windows;
use crate::{
arch::x86_64::*,
backend::{Instr, Program},
io::CompilerMsg,
};
use std::process::Command;
use util::*;
+13
View File
@@ -0,0 +1,13 @@
use std::{fs::OpenOptions, io::Write, os::unix::fs::OpenOptionsExt};
pub fn write(path: &str, binary: &[u8]) {
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.mode(0o750)
.open(path)
.expect("Failed to create file");
file.write_all(binary).expect("Failed to write to file");
file.sync_all().expect("Failed to sync file");
}
+48
View File
@@ -0,0 +1,48 @@
use super::*;
#[test]
fn hello() -> Result<(), CompilerMsg> {
let mut program = Program::<X86_64>::default();
let [get_std_handle, write_file, exit_process] =
program.external("KERNEL32.dll", ["GetStdHandle", "WriteFile", "ExitProcess"]);
let text = b"Hello world!\n";
let text_sym = program.ro_data("hello_en", text);
let written = program.ro_data("written", [0; 4]);
let entry = program.func(
"main",
[Instr::Asm(encode(|c| {
c.sub(esp, 0x28)?;
// stdout
c.mov(ecx, -11)?;
c.call_mem(get_std_handle);
// write
c.mov(rcx, rax)?;
c.lea(rdx, text_sym);
c.mov(r8d, text.len() as u64)?;
c.lea(r9, written);
c.mov(mem(rsp, 0x20, Width::B32), 0)?;
c.call_mem(write_file);
// exit
c.mov(ecx, 39)?;
c.call_mem(exit_process);
Ok(())
})?)],
);
program.entry = Some(entry);
let linked = program.compile().expect("failed to compile");
let binary = linked.to_pe();
let path = "test/bin/x86_64_test.exe";
write(path, &binary);
let mut cmd = Command::new("wine");
cmd.arg(path);
let output = cmd.output().expect("failed to run");
let Some(code) = output.status.code() else {
panic!("no exit code");
};
let result: String = output.stdout.try_into().expect("non ascii output");
assert_eq!(result, "Hello world!\n");
assert_eq!(code, 39);
Ok(())
}
+2
View File
@@ -0,0 +1,2 @@
mod full;
mod asm;
+171
View File
@@ -0,0 +1,171 @@
use std::num::TryFromIntError;
use super::*;
#[derive(Clone, Copy)]
pub struct Mem {
pub reg: Reg,
pub disp: i32,
pub width: Width,
}
#[derive(Clone, Copy)]
pub enum RegImmMem {
Reg(Reg),
Imm(Imm),
Mem(Mem),
}
#[derive(Clone, Copy)]
pub enum RegMem {
Reg(Reg),
Mem(Mem),
}
#[derive(Clone, Copy, PartialEq, PartialOrd)]
pub struct Imm(pub i128);
pub fn mem(reg: Reg, disp: i32, width: Width) -> Mem {
Mem { reg, disp, width }
}
impl Imm {
pub fn overflow_msg() -> CompilerMsg {
"immediate overflow".into()
}
pub fn width_signed(&self) -> Result<Width, CompilerMsg> {
Ok(match self.0 {
-0x80..=0x7f => Width::B8,
-0x8000..=0x7fff => Width::B16,
-0x8000_0000..=0x7fff_ffff => Width::B32,
-0x8000_0000_0000_0000..=0x7fff_ffff_ffff_ffff => Width::B64,
_ => return Err(Self::overflow_msg()),
})
}
pub fn width_unsigned(&self) -> Result<Width, CompilerMsg> {
Ok(match self.0 {
-0xff..=0xff => Width::B8,
-0xffff..=0xffff => Width::B16,
-0xffff_ffff..=0xffff_ffff => Width::B32,
-0xffff_ffff_ffff_ffff..=0xffff_ffff_ffff_ffff => Width::B64,
_ => return Err(Self::overflow_msg()),
})
}
pub fn reinterpret(&self, width: Width) -> Self {
Self(match width {
Width::B8 => self.0 as i8 as i128,
Width::B16 => self.0 as i16 as i128,
Width::B32 => self.0 as i32 as i128,
Width::B64 => self.0 as i64 as i128,
})
}
}
impl TryFrom<Imm> for u8 {
type Error = TryFromIntError;
fn try_from(value: Imm) -> Result<Self, Self::Error> {
value.0.try_into()
}
}
impl std::fmt::Display for Mem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Mem { reg, disp, width } = *self;
let size = match width {
Width::B8 => "BYTE",
Width::B16 => "WORD",
Width::B32 => "DWORD",
Width::B64 => "QWORD",
};
write!(f, "{size} [{reg} {}]", signed_hex(disp as i128, true))
}
}
// fromrot
impl From<Reg> for RegImmMem {
fn from(value: Reg) -> Self {
Self::Reg(value)
}
}
impl From<Reg> for RegMem {
fn from(value: Reg) -> Self {
Self::Reg(value)
}
}
impl From<Mem> for RegImmMem {
fn from(value: Mem) -> Self {
Self::Mem(value)
}
}
impl From<Mem> for RegMem {
fn from(value: Mem) -> Self {
Self::Mem(value)
}
}
impl From<u64> for RegImmMem {
fn from(value: u64) -> Self {
Self::Imm(value.into())
}
}
impl From<i64> for RegImmMem {
fn from(value: i64) -> Self {
Self::Imm(value.into())
}
}
impl From<i32> for RegImmMem {
fn from(value: i32) -> Self {
Self::Imm(value.into())
}
}
impl From<i128> for RegImmMem {
fn from(value: i128) -> Self {
Self::Imm(value.into())
}
}
impl From<u64> for Imm {
fn from(value: u64) -> Self {
Self(value as i128)
}
}
impl From<i64> for Imm {
fn from(value: i64) -> Self {
Self(value as i128)
}
}
impl From<i32> for Imm {
fn from(value: i32) -> Self {
Self(value as i128)
}
}
impl From<i128> for Imm {
fn from(value: i128) -> Self {
Self(value)
}
}
impl std::fmt::Display for Imm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl std::fmt::Debug for Imm {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
+175
View File
@@ -0,0 +1,175 @@
use crate::backend::Symbol;
use super::*;
pub trait ModRMRM {
fn rm(&self) -> u8;
fn addr(&self) -> EffAddr;
}
pub enum EffAddr {
Mem0,
Mem8(i8),
Mem32(i32),
Sym(Symbol),
None,
}
impl ModRMRM for Reg {
fn rm(&self) -> u8 {
self.base()
}
fn addr(&self) -> EffAddr {
EffAddr::None
}
}
impl ModRMRM for Mem {
fn rm(&self) -> u8 {
self.reg.base()
}
fn addr(&self) -> EffAddr {
const I8_MIN: i32 = i8::MIN as i32;
const I8_MAX: i32 = i8::MAX as i32;
let disp = self.disp;
match disp {
0 => {
if self.reg.base() == 0b101 {
EffAddr::Mem8(0)
} else {
EffAddr::Mem0
}
}
I8_MIN..=I8_MAX => EffAddr::Mem8(disp as i8),
_ => EffAddr::Mem32(disp),
}
}
}
impl ModRMRM for i32 {
fn rm(&self) -> u8 {
0b101
}
fn addr(&self) -> EffAddr {
EffAddr::Mem32(*self)
}
}
impl ModRMRM for Symbol {
fn rm(&self) -> u8 {
0b101
}
fn addr(&self) -> EffAddr {
EffAddr::Sym(*self)
}
}
impl ModRMReg for u8 {
fn val(&self) -> u8 {
*self
}
}
impl ModRMReg for Reg {
fn val(&self) -> u8 {
self.base()
}
}
pub trait ModRMReg {
fn val(&self) -> u8;
}
#[inline(always)]
pub fn rex(w: impl RexW, r: impl RexBit, x: u8, b: impl RexBit) -> u8 {
0b0100_0000 | bit(w.rexw(), 3) | bit(r.rex(), 2) | bit(x.rex(), 1) | bit(b.rex(), 0)
}
#[inline(always)]
fn bit(val: bool, pos: u8) -> u8 {
(val as u8) << pos
}
pub trait RexBit: Sized {
fn rex(&self) -> bool;
fn req(&self) -> bool {
false
}
}
impl RexBit for u8 {
fn rex(&self) -> bool {
*self != 0
}
}
impl RexBit for Reg {
fn rex(&self) -> bool {
self.gt8()
}
fn req(&self) -> bool {
self.gt4() && (self.width() == Width::B8) && !self.high()
}
}
impl RexBit for Mem {
fn rex(&self) -> bool {
self.reg.rex()
}
}
pub trait RexW {
fn rexw(&self) -> bool;
}
impl RexW for Width {
fn rexw(&self) -> bool {
*self == Width::B64
}
}
impl RexW for Reg {
fn rexw(&self) -> bool {
self.width().rexw()
}
}
impl RexW for u8 {
fn rexw(&self) -> bool {
*self == 1
}
}
impl RexW for Mem {
fn rexw(&self) -> bool {
self.width.rexw()
}
}
/// assumes the next instruction is directly after
pub fn addr_offset(pos: usize, addr: u64) -> [u8; 4] {
let pos = (pos + 4) as i32;
let offset = addr as i32 - pos;
offset.to_le_bytes()
}
pub struct SignedHex {
pub val: i128,
pub op: bool,
}
pub fn signed_hex(val: i128, op: bool) -> SignedHex {
SignedHex { val, op }
}
impl std::fmt::Display for SignedHex {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let dsp = self.val.abs();
let sign = match (self.op, self.val < 0) {
(true, true) => "- ",
(true, false) => "+ ",
(false, true) => "-",
(false, false) => "",
};
write!(f, "{sign}0x{dsp:x}")
}
}
@@ -1,6 +1,7 @@
use super::{program::Addr, LinkedProgram};
use crate::backend::{LinkedProgram, container::encode::ByteEncoder};
#[repr(C)]
#[derive(Default)]
pub struct ELF64Header {
magic: u32,
class: u8,
@@ -25,6 +26,7 @@ pub struct ELF64Header {
}
#[repr(C)]
#[derive(Default)]
pub struct ProgramHeader {
ty: u32,
flags: u32,
@@ -50,25 +52,47 @@ pub struct SectionHeader {
entry_size: u64,
}
// this is currently specialized for riscv64; obviously add params later
pub fn create(program: &[u8], start_offset: Addr) -> Vec<u8> {
let addr_start = 0x1000;
pub enum Arch {
X86_64,
Riscv,
}
impl Arch {
pub fn machine(&self) -> u16 {
match self {
Arch::X86_64 => 0x3e,
Arch::Riscv => 0xf3,
}
}
}
#[repr(u8)]
pub enum EType {
None = 0,
Rel = 1,
Exec = 2,
Dyn = 3,
Core = 4,
}
// this is currently specialized for x86_64; obviously add params later
pub fn create(program: &[u8], start_offset: u64) -> Vec<u8> {
let pie = true;
let addr_start = if pie { 0 } else { 0x400000 };
let page_size = 0x1000;
// I don't know if I have to add addr_start here, idk how it maps the memory
let program_size = std::mem::size_of_val(program) as u64 + addr_start;
let program_header = ProgramHeader {
ty: 0x1, // LOAD
flags: 0b101, // executable, readable
offset: 0x0,
vaddr: addr_start,
paddr: addr_start,
filesz: program_size,
memsz: program_size,
align: page_size,
};
let header_len = (size_of::<ELF64Header>() + size_of::<ProgramHeader>()) as u64;
let program_pos = header_len;
let header = ELF64Header {
let program_size = std::mem::size_of_val(program) as u64;
let mut data = ByteEncoder::default();
let header = data.reserve::<ELF64Header>();
let program_header_offset = data.pos() as u64;
let program_header = data.reserve::<ProgramHeader>();
let program_pos = data.pos() as u64;
data.extend(program);
data[header] = ELF64Header {
magic: 0x7f_45_4c_46u32.swap_bytes(),
class: 0x2, // 64 bit
endianness: 0x1, // little endian
@@ -76,11 +100,11 @@ pub fn create(program: &[u8], start_offset: Addr) -> Vec<u8> {
os_abi: 0x0, // system-v
os_abi_ver: 0x0,
pad: [0x0; 7],
ty: 0x2, // executable
machine: 0xf3, // risc-v
ty: if pie { EType::Dyn } else { EType::Exec } as u16,
machine: Arch::X86_64.machine(),
e_version: 0x1,
entry: addr_start + program_pos + start_offset.val(),
program_header_offset: size_of::<ELF64Header>() as u64,
entry: addr_start + program_pos + start_offset,
program_header_offset,
section_header_offset: 0x0,
// C ABI (16 bit instruction align) + double precision floats
flags: 0x1 | 0x4,
@@ -91,21 +115,21 @@ pub fn create(program: &[u8], start_offset: Addr) -> Vec<u8> {
section_header_num: 0x0,
section_header_str_idx: 0x0,
};
let mut bytes: Vec<u8> = Vec::new();
unsafe {
bytes.extend(as_u8_slice(&header));
bytes.extend(as_u8_slice(&program_header));
bytes.extend(program);
}
bytes
data[program_header] = ProgramHeader {
ty: 0x1, // LOAD
flags: 0b101, // executable, readable
offset: 0x0,
vaddr: addr_start,
paddr: 0x0,
filesz: program_size,
memsz: program_size,
align: page_size,
};
data.data
}
unsafe fn as_u8_slice<T: Sized>(p: &T) -> &[u8] {
core::slice::from_raw_parts((p as *const T) as *const u8, size_of::<T>())
}
impl LinkedProgram {
impl LinkedProgram<u64> {
pub fn to_elf(&self) -> Vec<u8> {
create(&self.code, self.start.expect("no start found"))
create(&self.code, self.entry.expect("no start"))
}
}
+154
View File
@@ -0,0 +1,154 @@
use std::ops::{Index, IndexMut};
#[derive(Default)]
pub struct ByteEncoder {
pub data: Vec<u8>,
}
impl ByteEncoder {
pub fn push(&mut self, byte: u8) {
self.data.push(byte);
}
pub fn val<T>(&mut self, val: &T) -> Reserved<T> {
let pos = self.pos();
let slice =
unsafe { core::slice::from_raw_parts((val as *const T) as *const u8, size_of::<T>()) };
self.data.extend(slice);
Reserved::new(pos)
}
pub fn pos(&self) -> usize {
self.data.len()
}
pub fn align(&mut self, align: usize) {
self.data.resize(self.data.len().next_multiple_of(align), 0);
}
fn ptr_at<T>(&mut self, index: usize) -> *mut T {
let slice = &mut self.data[index..index + size_of::<T>()];
(slice as *mut [u8]) as *mut T
}
pub fn set_at<T>(&mut self, index: usize, val: T) {
let ptr = self.ptr_at::<T>(index);
unsafe {
ptr.write_unaligned(val);
}
}
pub fn edit_at<T>(&mut self, index: usize, edit: impl FnOnce(T) -> T) {
let ptr = self.ptr_at::<T>(index);
unsafe {
let val = ptr.read_unaligned();
ptr.write_unaligned(edit(val));
}
}
#[must_use]
pub fn reserve<T>(&mut self) -> Reserved<T> {
let pos = self.pos();
self.data.resize(self.data.len() + size_of::<T>(), 0);
Reserved::new(pos)
}
pub fn pad(&mut self, amt: usize) {
self.data.resize(self.data.len() + amt, 0);
}
#[must_use]
pub fn reserve_arr<T>(&mut self, len: usize) -> ReservedArr<T> {
let pos = self.pos();
self.data.resize(self.data.len() + size_of::<T>() * len, 0);
ReservedArr::new(pos, len)
}
}
pub struct Reserved<T> {
pos: usize,
_pd: std::marker::PhantomData<T>,
}
impl<T> Clone for Reserved<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for Reserved<T> {}
pub struct ReservedArr<T> {
pos: usize,
len: usize,
_pd: std::marker::PhantomData<T>,
}
impl<T> Clone for ReservedArr<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for ReservedArr<T> {}
impl<T> Reserved<T> {
fn new(pos: usize) -> Self {
Self {
pos,
_pd: std::marker::PhantomData,
}
}
}
impl<T> ReservedArr<T> {
fn new(pos: usize, len: usize) -> Self {
Self {
pos,
len,
_pd: std::marker::PhantomData,
}
}
}
impl<T> Index<Reserved<T>> for ByteEncoder {
type Output = T;
fn index(&self, index: Reserved<T>) -> &Self::Output {
let slice = &self.data[index.pos..index.pos + size_of::<T>()];
unsafe { &core::slice::from_raw_parts((slice as *const [u8]) as *const T, 1)[0] }
}
}
impl<T> IndexMut<Reserved<T>> for ByteEncoder {
fn index_mut(&mut self, index: Reserved<T>) -> &mut Self::Output {
let slice = &mut self.data[index.pos..index.pos + size_of::<T>()];
unsafe { &mut core::slice::from_raw_parts_mut((slice as *mut [u8]) as *mut T, 1)[0] }
}
}
impl<T> Index<ReservedArr<T>> for ByteEncoder {
type Output = [T];
fn index(&self, index: ReservedArr<T>) -> &Self::Output {
let slice = &self.data[index.pos..index.pos + size_of::<T>() * index.len];
unsafe { core::slice::from_raw_parts((slice as *const [u8]) as *const T, index.len) }
}
}
impl<T> IndexMut<ReservedArr<T>> for ByteEncoder {
fn index_mut(&mut self, index: ReservedArr<T>) -> &mut Self::Output {
let slice = &mut self.data[index.pos..index.pos + size_of::<T>() * index.len];
unsafe { core::slice::from_raw_parts_mut((slice as *mut [u8]) as *mut T, index.len) }
}
}
impl Extend<u8> for ByteEncoder {
fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
self.data.extend(iter);
}
}
impl<'a> Extend<&'a u8> for ByteEncoder {
fn extend<T: IntoIterator<Item = &'a u8>>(&mut self, iter: T) {
self.data.extend(iter);
}
}
+5
View File
@@ -0,0 +1,5 @@
pub mod elf;
mod encode;
pub mod pe;
use encode::*;
+27
View File
@@ -0,0 +1,27 @@
#[derive(Default)]
#[repr(C)]
pub struct DataDirs {
pub export: DataDir,
pub import: DataDir,
pub rsc: DataDir,
pub exception: DataDir,
pub cert: DataDir,
pub base_reloc: DataDir,
pub debug: DataDir,
pub arch: DataDir,
pub global_ptr: DataDir,
pub tls: DataDir,
pub load_config: DataDir,
pub bound_import: DataDir,
pub import_addr: DataDir,
pub delay_import_desc: DataDir,
pub clr_runtime_header: DataDir,
pub reserved: DataDir,
}
#[derive(Default)]
#[repr(C)]
pub struct DataDir {
pub virt_addr_rva: u32,
pub size: u32,
}
+65
View File
@@ -0,0 +1,65 @@
#[repr(C)]
pub struct MZHeader {
pub magic: u16,
pub stuff: [u16; 15 + 4 + 10],
pub lfanew: u32,
}
#[repr(C)]
pub struct PeHeader {
pub magic: u32,
pub machine: u16,
pub num_sections: u16,
pub time_date_stamp: u32,
pub sym_tab_ptr: u32,
pub num_symbols: u32,
pub opt_header_size: u16,
pub characteristics: u16,
}
#[repr(C)]
pub struct OptHeader64 {
pub magic: u16,
pub major_linker_ver: u8,
pub minor_linker_ver: u8,
pub code_size: u32,
pub init_data_size: u32,
pub uninit_data_size: u32,
pub entry_addr: u32,
pub code_base: u32,
pub image_base: u64,
pub section_align: u32,
pub file_align: u32,
pub major_os_ver: u16,
pub minor_os_ver: u16,
pub major_image_ver: u16,
pub minor_image_ver: u16,
pub major_subsystem_ver: u16,
pub minor_subsystem_ver: u16,
pub win32_ver: u32,
pub image_size: u32,
pub headers_size: u32,
pub checksum: u32,
pub subsystem: u16,
pub dll_characteristics: u16,
pub stack_reserve_size: u64,
pub stack_commit_size: u64,
pub heap_reserve_size: u64,
pub heap_commit_size: u64,
pub loader_flags: u32,
pub num_of_rva_and_sizes: u32,
}
#[repr(C)]
pub struct Section {
pub name: [u8; 8],
pub virtual_size: u32,
pub virtual_addr: u32,
pub raw_data_size: u32,
pub raw_data_ptr: u32,
pub reloc_ptr: u32,
pub line_num_ptr: u32,
pub num_relocs: u16,
pub num_line_nums: u16,
pub characteristics: u32,
}
+94
View File
@@ -0,0 +1,94 @@
use crate::backend::{LibImport, pe::data_dir::DataDir};
use super::ByteEncoder;
pub fn encode(data: &mut ByteEncoder, imports: &[LibImport], code_start: usize) -> DataDir {
data.align(4);
let start = data.pos() as u32;
let idt = data.reserve_arr::<ImportDirTable>(imports.len());
// null entry to mark end
data.pad(size_of::<ImportDirTable>());
let end = data.pos() as u32;
for (i, import) in imports.iter().enumerate() {
// name
let name_rva = data.pos() as u32;
data.extend(import.name.as_bytes());
data.push(0);
// lookup table
data.align(size_of::<ImportLookupEntry>());
let lookup_start = data.pos();
let lookup = data.reserve_arr::<ImportLookupEntry>(import.syms.len());
data.pad(size_of::<ImportLookupEntry>());
for (i, sym) in import.syms.iter().enumerate() {
let rva = hint_name_entry(data, 0, &sym.name);
data[lookup][i] = ImportLookupEntry::name(rva);
}
// address table
data.align(size_of::<ImportLookupEntry>());
let addr_start = data.pos();
for (i, sym) in import.syms.iter().enumerate() {
let here = data.pos() as i32;
for &usage in &sym.usages {
// NOTE: sets relative offet rn
let code_pos = code_start + usage;
data.set_at::<i32>(code_pos, here - code_pos as i32 - 4);
}
let entry = data[lookup][i];
data.val(&entry);
}
data.pad(size_of::<ImportLookupEntry>());
// entry
data[idt][i] = ImportDirTable {
lookup_table_rva: lookup_start as u32,
time_date_stamp: 0,
forwarder_chain: 0,
name_rva,
address_table_rva: addr_start as u32,
};
}
DataDir {
virt_addr_rva: start,
size: end - start,
}
}
#[repr(C)]
pub struct ImportDirTable {
pub lookup_table_rva: u32,
pub time_date_stamp: u32,
pub forwarder_chain: u32,
pub name_rva: u32,
pub address_table_rva: u32,
}
#[repr(C)]
#[derive(Clone, Copy)]
pub struct ImportLookupEntry(u64);
impl ImportLookupEntry {
pub const NULL: Self = Self(0);
pub fn name(hint_name_table_rva: u32) -> Self {
assert!(hint_name_table_rva >> 30 == 0);
Self(hint_name_table_rva as u64)
}
pub fn ordinal(ordinal: u16) -> Self {
Self(ordinal as u64 | (1 << 63))
}
pub fn bytes(&self) -> [u8; 8] {
self.0.to_le_bytes()
}
}
pub fn hint_name_entry(data: &mut ByteEncoder, hint: u16, name: &str) -> u32 {
let pos = data.pos() as u32;
data.extend(hint.to_le_bytes());
data.extend(name.as_bytes());
data.push(0);
data.align(2);
pos
}
+114
View File
@@ -0,0 +1,114 @@
mod data_dir;
mod header;
mod import;
use super::*;
use crate::backend::LinkedProgram;
use data_dir::*;
use header::*;
pub fn create(program: &LinkedProgram<u64>) -> Vec<u8> {
let mut data = ByteEncoder::default();
let file_align = 1;
let section_align = 1;
let num_of_rva_and_sizes: u32 = (size_of::<DataDirs>() / size_of::<DataDir>()) as u32;
let mz_header = data.reserve::<MZHeader>();
let pe_header_pos = data.pos();
data.val(&PeHeader {
magic: u32::from_ne_bytes(*b"PE\0\0"),
machine: 0x8664,
num_sections: 1,
time_date_stamp: 0,
sym_tab_ptr: 0,
num_symbols: 0,
opt_header_size: (size_of::<OptHeader64>() + size_of::<DataDirs>()) as u16,
// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format
// executable | can handle >2GB addrs | debug info removed
characteristics: 0x2 | 0x20 | 0x0200,
});
data[mz_header] = MZHeader {
magic: u16::from_ne_bytes(*b"MZ"),
stuff: [0; _],
lfanew: pe_header_pos as u32,
};
let opt_header = data.reserve::<OptHeader64>();
let data_dirs = data.val(&DataDirs::default());
let code_sect = data.reserve::<Section>();
let hdr_size = data.pos() as u32;
// .text start
let text_start = data.pos() as u32;
let code_start = data.pos();
data.extend(&program.code);
if !program.imports.is_empty() {
let import_rva = import::encode(&mut data, &program.imports, code_start);
data[data_dirs].import = import_rva;
}
let text_size = data.pos() as u32 - text_start;
// .text end
data[code_sect] = Section {
name: *b".text\0\0\0",
virtual_size: text_size,
virtual_addr: hdr_size.next_multiple_of(section_align),
raw_data_size: text_size.next_multiple_of(file_align),
raw_data_ptr: text_start,
reloc_ptr: 0,
line_num_ptr: 0,
num_relocs: 0,
num_line_nums: 0,
characteristics: 0x60000020,
};
let file_size = data.pos() as u32;
data[opt_header] = OptHeader64 {
magic: 0x20b,
major_linker_ver: 8,
minor_linker_ver: 0,
code_size: text_size.next_multiple_of(file_align),
init_data_size: 0,
uninit_data_size: 0,
entry_addr: (code_start as u64 + program.entry.unwrap()) as u32,
code_base: text_start,
image_base: 0x400000,
section_align,
file_align,
major_os_ver: 4,
minor_os_ver: 0,
major_image_ver: 0,
minor_image_ver: 0,
major_subsystem_ver: 4,
minor_subsystem_ver: 0,
win32_ver: 0,
image_size: file_size.next_multiple_of(section_align),
headers_size: hdr_size.next_multiple_of(file_align),
checksum: 0,
subsystem: 3, // windows CLI app
dll_characteristics: 0x400,
stack_reserve_size: 0x100000,
stack_commit_size: 0x1000,
heap_reserve_size: 0x100000,
heap_commit_size: 0x1000,
loader_flags: 0,
num_of_rva_and_sizes,
};
data.data
}
impl LinkedProgram<u64> {
pub fn to_pe(&self) -> Vec<u8> {
create(&self)
}
}
+15
View File
@@ -0,0 +1,15 @@
pub trait Addr: Clone + Copy {
fn from_len(len: usize) -> Self;
}
impl Addr for u64 {
fn from_len(len: usize) -> Self {
len as Self
}
}
impl Addr for u32 {
fn from_len(len: usize) -> Self {
len as Self
}
}
+126
View File
@@ -0,0 +1,126 @@
mod addr;
mod symbol;
pub use addr::*;
pub use symbol::*;
use crate::{arch::Arch, backend::LinkedProgram, io::CompilerMsg};
pub struct Program<A: Arch> {
pub ro_data: Vec<Data>,
pub funcs: Vec<Func<A>>,
pub entry: Option<Symbol>,
pub external: Vec<External>,
sym_info: Vec<SymInfo>,
sym_count: usize,
}
pub struct Data {
pub bytes: Vec<u8>,
pub sym: Symbol,
}
pub struct Func<A: Arch> {
pub instrs: Vec<Instr<A>>,
pub sym: Symbol,
}
pub struct External {
pub file: String,
pub syms: Vec<Symbol>,
}
pub struct SymInfo {
pub name: String,
pub external: bool,
}
pub enum Instr<A: Arch> {
Set { dst: VarId, src: Vec<u8> },
Call { dst: FnId, args: Vec<VarId> },
Copy { dst: VarId, src: VarId },
Asm(A::Asm),
}
pub type VarId = usize;
pub type FnId = usize;
impl<A: Arch> Program<A> {
pub fn encode_data(&self, data: &mut Vec<u8>, sym_tab: &mut SymTable<A::Addr>) {
for d in &self.ro_data {
let addr = A::Addr::from_len(data.len());
data.extend(&d.bytes);
sym_tab.insert(d.sym, addr);
}
}
pub fn ro_data(&mut self, name: impl Into<String>, bytes: impl Into<Vec<u8>>) -> Symbol {
let bytes = bytes.into();
let sym = self.reserve(SymInfo {
name: name.into(),
external: false,
});
self.ro_data.push(Data { bytes, sym });
sym
}
pub fn func(&mut self, name: impl Into<String>, instrs: impl Into<Vec<Instr<A>>>) -> Symbol {
let instrs = instrs.into();
let sym = self.reserve(SymInfo {
name: name.into(),
external: false,
});
self.funcs.push(Func { instrs, sym });
sym
}
pub fn external<const LEN: usize>(
&mut self,
file: impl Into<String>,
names: [impl Into<String>; LEN],
) -> [Symbol; LEN] {
let syms = names.map(|s| {
self.reserve(SymInfo {
name: s.into(),
external: true,
})
});
self.external.push(External {
file: file.into(),
syms: syms.to_vec(),
});
syms
}
fn reserve(&mut self, info: SymInfo) -> Symbol {
let res = Symbol(self.sym_count);
self.sym_info.push(info);
self.sym_count += 1;
res
}
pub fn compile(&self) -> Result<LinkedProgram<A::Addr>, CompilerMsg> {
A::compile(self)
}
pub fn sym_count(&self) -> usize {
self.sym_count
}
pub fn sym_info(&self, sym: Symbol) -> &SymInfo {
&self.sym_info[sym.0]
}
}
impl<A: Arch> Default for Program<A> {
fn default() -> Self {
Self {
ro_data: Default::default(),
funcs: Default::default(),
entry: Default::default(),
sym_count: Default::default(),
external: Default::default(),
sym_info: Default::default(),
}
}
}
+15
View File
@@ -0,0 +1,15 @@
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct Symbol(pub(super) usize);
pub struct SymTable<Addr>(Vec<Option<Addr>>);
impl<Addr: Clone + Copy> SymTable<Addr> {
pub fn new(len: usize) -> Self {
Self(vec![None; len])
}
pub fn insert(&mut self, sym: Symbol, addr: Addr) {
self.0[sym.0] = Some(addr);
}
pub fn get(&self, sym: Symbol) -> Option<Addr> {
self.0[sym.0]
}
}
+15
View File
@@ -0,0 +1,15 @@
pub struct LinkedProgram<Addr> {
pub code: Vec<u8>,
pub entry: Option<Addr>,
pub imports: Vec<LibImport>,
}
pub struct LibImport {
pub name: String,
pub syms: Vec<SymImport>,
}
pub struct SymImport {
pub name: String,
pub usages: Vec<usize>,
}
+7
View File
@@ -0,0 +1,7 @@
mod container;
mod ir;
mod link;
pub use container::*;
pub use ir::*;
pub use link::*;
-117
View File
@@ -1,117 +0,0 @@
use std::{collections::HashMap, path::PathBuf};
pub type FileID = usize;
pub type FileMap = HashMap<FileID, SrcFile>;
#[derive(Debug, Clone)]
pub struct SrcFile {
pub path: PathBuf,
pub text: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct FilePos {
pub file: FileID,
pub line: usize,
pub col: usize,
}
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct FileSpan {
pub file: FileID,
pub start: FilePos,
pub end: FilePos,
}
impl FilePos {
pub fn start(file: FileID) -> Self {
Self {
line: 0,
col: 0,
file,
}
}
}
impl FilePos {
pub fn to(self, end: FilePos) -> FileSpan {
FileSpan {
start: self,
end,
file: self.file,
}
}
pub fn char_span(self) -> FileSpan {
FileSpan::at(self)
}
}
const BEFORE: usize = 1;
const AFTER: usize = 0;
impl FileSpan {
const BUILTIN_FILE: usize = usize::MAX;
pub fn at(pos: FilePos) -> Self {
Self {
start: pos,
end: pos,
file: pos.file,
}
}
pub fn builtin() -> Self {
let pos = FilePos {
file: Self::BUILTIN_FILE,
line: 0,
col: 0,
};
Self::at(pos)
}
pub fn is_builtin(&self) -> bool {
self.file == Self::BUILTIN_FILE
}
pub fn write_for(&self, writer: &mut impl std::io::Write, file: &str) -> std::io::Result<()> {
if self.is_builtin() {
return Ok(());
}
let start = self.start.line.saturating_sub(BEFORE);
let num_before = self.start.line - start;
let mut lines = file.lines().skip(start);
let width = format!("{}", self.end.line + AFTER).len();
let same_line = self.start.line == self.end.line;
for i in 0..num_before {
writeln!(writer, "{:>width$} | {}", start + i, lines.next().unwrap())?;
}
let line = lines.next().unwrap();
writeln!(writer, "{:>width$} | {}", self.start.line, line)?;
let len = if same_line {
self.end.col - self.start.col + 1
} else {
line.len() - self.start.col
};
writeln!(
writer,
"{} | {}",
" ".repeat(width),
" ".repeat(self.start.col) + &"^".repeat(len)
)?;
if !same_line {
for _ in 0..self.end.line - self.start.line - 1 {
lines.next();
}
let line = lines.next().unwrap();
writeln!(writer, "{:>width$} | {}", self.end.line, line)?;
writeln!(
writer,
"{} | {}",
" ".repeat(width),
"^".repeat(self.end.col + 1)
)?;
}
// for i in 0..AFTER {
// if let Some(next) = lines.next() {
// writeln!(writer, "{:>width$} | {}", self.end.line + i + 1, next)?;
// }
// }
Ok(())
}
}
-5
View File
@@ -1,5 +0,0 @@
mod output;
mod file;
pub use output::*;
pub use file::*;
-73
View File
@@ -1,73 +0,0 @@
use super::{FileMap, FilePos, FileSpan};
#[derive(Debug, Clone)]
pub struct CompilerMsg {
pub msg: String,
pub spans: Vec<FileSpan>,
}
pub struct CompilerOutput {
pub file_map: FileMap,
pub errs: Vec<CompilerMsg>,
pub hints: Vec<CompilerMsg>,
}
impl CompilerMsg {
pub fn from_msg(msg: String) -> Self {
Self {
msg,
spans: Vec::new(),
}
}
pub fn new(msg: String, span: FileSpan) -> Self {
Self {
msg,
spans: vec![span],
}
}
pub fn at(pos: FilePos, msg: String) -> Self {
Self {
msg,
spans: vec![FileSpan::at(pos)],
}
}
pub fn write_to(
&self,
ty: &str,
writer: &mut impl std::io::Write,
map: &FileMap,
) -> std::io::Result<()> {
let after = if self.spans.is_empty() { "" } else { ":" };
writeln!(writer, "{}: {}{}", ty, self.msg, after)?;
for span in &self.spans {
let file = map.get(&span.file).expect("unknown file id");
writeln!(writer, "{:?}", &file.path)?;
span.write_for(writer, &file.text)?;
}
Ok(())
}
}
impl CompilerOutput {
pub fn new() -> Self {
Self {
errs: Vec::new(),
hints: Vec::new(),
file_map: FileMap::new(),
}
}
pub fn err(&mut self, msg: CompilerMsg) {
self.errs.push(msg);
}
pub fn hint(&mut self, msg: CompilerMsg) {
self.hints.push(msg);
}
pub fn write_to(&self, out: &mut impl std::io::Write) {
for err in &self.errs {
err.write_to("error", out, &self.file_map).unwrap();
}
for hint in &self.hints {
hint.write_to("hint", out, &self.file_map).unwrap();
}
}
}
-1
View File
@@ -1 +0,0 @@
pub mod riscv;
-357
View File
@@ -1,357 +0,0 @@
use crate::{
compiler::program::{Addr, Instr, SymTable},
ir::Symbol,
util::{Bits32, LabeledFmt},
};
use super::*;
#[derive(Clone, Copy)]
pub enum LinkerInstruction<R = Reg, S = Symbol> {
Op {
op: Funct3,
funct: Funct7,
dest: R,
src1: R,
src2: R,
},
OpImm {
op: Funct3,
dest: R,
src: R,
imm: i32,
},
OpImmF7 {
op: Funct3,
funct: Funct7,
dest: R,
src: R,
imm: i32,
},
Store {
width: Funct3,
src: R,
offset: i32,
base: R,
},
Load {
width: Funct3,
dest: R,
offset: i32,
base: R,
},
Mv {
dest: R,
src: R,
},
La {
dest: R,
src: S,
},
Jal {
dest: R,
offset: i32,
},
Call(S),
J(S),
Branch {
to: S,
typ: Funct3,
left: R,
right: R,
},
Ret,
ECall,
EBreak,
Li {
dest: R,
imm: i32,
},
}
impl<R, S> LinkerInstruction<R, S> {
pub fn map<R2, S2>(&self, r: impl Fn(&R) -> R2) -> LinkerInstruction<R2, S2> {
self.try_map(|v| Some(r(v))).unwrap()
}
pub fn try_map<R2, S2>(&self, r: impl Fn(&R) -> Option<R2>) -> Option<LinkerInstruction<R2, S2>> {
use LinkerInstruction as I;
Some(match self {
Self::ECall => I::ECall,
Self::EBreak => I::EBreak,
&Self::Li { ref dest, imm } => I::Li { dest: r(dest)?, imm },
Self::Mv { ref dest, src } => I::Mv {
dest: r(dest)?,
src: r(src)?,
},
Self::La { .. } => todo!(),
&Self::Load {
width,
ref dest,
ref base,
offset,
} => I::Load {
width,
dest: r(dest)?,
offset,
base: r(base)?,
},
&Self::Store {
width,
ref src,
ref base,
offset,
} => I::Store {
width,
src: r(src)?,
offset,
base: r(base)?,
},
&Self::Op {
op,
funct,
ref dest,
ref src1,
ref src2,
} => I::Op {
op,
funct,
dest: r(dest)?,
src1: r(src1)?,
src2: r(src2)?,
},
&Self::OpImm { op, ref dest, ref src, imm } => I::OpImm {
op,
dest: r(dest)?,
src: r(src)?,
imm,
},
&Self::OpImmF7 {
op,
funct,
ref dest,
ref src,
imm,
} => I::OpImmF7 {
op,
funct,
dest: r(dest)?,
src: r(src)?,
imm,
},
Self::Ret => I::Ret,
Self::Call(..) => todo!(),
Self::Jal { .. } => todo!(),
Self::J(..) => todo!(),
Self::Branch { .. } => todo!(),
})
}
}
pub fn addi(dest: Reg, src: Reg, imm: BitsI32<11, 0>) -> RawInstruction {
opi(op32i::ADD, dest, src, imm.to_u())
}
pub fn ori(dest: Reg, src: Reg, imm: Bits32<11, 0>) -> RawInstruction {
opi(op32i::OR, dest, src, imm)
}
impl Instr for LinkerInstruction {
fn push_to(
&self,
data: &mut Vec<u8>,
sym_map: &mut SymTable,
pos: Addr,
missing: bool,
) -> Option<Symbol> {
let last = match self {
Self::Op {
op,
funct,
dest,
src1,
src2,
} => opr(*op, *funct, *dest, *src1, *src2),
Self::OpImm { op, dest, src, imm } => opi(*op, *dest, *src, BitsI32::new(*imm).to_u()),
Self::OpImmF7 {
op,
funct,
dest,
src,
imm,
} => opif7(*op, *funct, *dest, *src, BitsI32::new(*imm)),
Self::Store {
width,
src,
offset,
base,
} => store(*width, *src, BitsI32::new(*offset), *base),
Self::Load {
width,
dest,
offset,
base,
} => load(*width, *dest, BitsI32::new(*offset), *base),
Self::Mv { dest, src } => addi(*dest, *src, BitsI32::new(0)),
Self::La { dest, src } => {
if let Some(addr) = sym_map.get(*src) {
let offset = addr.val() as i32 - pos.val() as i32;
let sign = offset.signum();
let mut lower = offset % 0x1000;
let mut upper = offset - lower;
if (((lower >> 11) & 1) == 1) ^ (sign == -1) {
let add = sign << 12;
upper += add;
lower = offset - upper;
}
assert!(upper + (lower << 20 >> 20) == offset);
data.extend(auipc(*dest, BitsI32::new(upper)).to_le_bytes());
addi(*dest, *dest, BitsI32::new(lower))
} else {
data.extend_from_slice(&[0; 2 * 4]);
return Some(*src);
}
}
Self::Jal { dest, offset } => jal(*dest, BitsI32::new(*offset)),
Self::J(sym) => {
if let Some(addr) = sym_map.get(*sym) {
let offset = addr.val() as i32 - pos.val() as i32;
j(BitsI32::new(offset))
} else {
data.extend_from_slice(&[0; 4]);
return Some(*sym);
}
}
Self::Call(sym) => {
if let Some(addr) = sym_map.get(*sym) {
let offset = addr.val() as i32 - pos.val() as i32;
jal(ra, BitsI32::new(offset))
} else {
data.extend_from_slice(&[0; 4]);
return Some(*sym);
}
}
Self::Ret => ret(),
Self::ECall => ecall(),
Self::EBreak => ebreak(),
Self::Li { dest, imm } => addi(*dest, zero, BitsI32::new(*imm)),
Self::Branch {
to,
typ,
left,
right,
} => {
if let Some(addr) = sym_map.get(*to) {
let offset = addr.val() as i32 - pos.val() as i32;
branch(*typ, *left, *right, BitsI32::new(offset))
} else {
data.extend_from_slice(&[0; 4]);
return Some(*to);
}
}
};
data.extend(last.to_le_bytes());
None
}
}
impl LinkerInstruction {
pub fn addi(dest: Reg, src: Reg, imm: i32) -> Self {
Self::OpImm {
op: op32i::ADD,
dest,
src,
imm,
}
}
pub fn sd(src: Reg, offset: i32, base: Reg) -> Self {
Self::Store {
width: width::D,
src,
offset,
base,
}
}
pub fn ld(dest: Reg, offset: i32, base: Reg) -> Self {
Self::Load {
width: width::D,
dest,
offset,
base,
}
}
}
// this is not even remotely worth it but technically it doesn't use the heap I think xdddddddddd
impl<R: std::fmt::Debug, S: std::fmt::Debug> std::fmt::Debug for LinkerInstruction<R, S> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.fmt_label(f, &|f, s| write!(f, "{s:?}"))
}
}
pub struct DebugInstr<'a, R, S, L: Fn(&mut std::fmt::Formatter<'_>, &S) -> std::fmt::Result> {
instr: &'a LinkerInstruction<R, S>,
label: &'a L,
}
impl<R: std::fmt::Debug, S: std::fmt::Debug> LabeledFmt<S> for LinkerInstruction<R, S> {
fn fmt_label(
&self,
f: &mut std::fmt::Formatter<'_>,
label: &dyn crate::util::Labeler<S>,
) -> std::fmt::Result {
match self {
Self::ECall => write!(f, "ecall"),
Self::EBreak => write!(f, "ebreak"),
Self::Li { dest, imm } => write!(f, "li {dest:?}, {imm:?}"),
Self::Mv { dest, src } => write!(f, "mv {dest:?}, {src:?}"),
Self::La { dest, src } => {
write!(f, "la {dest:?}, @")?;
label(f, src)
}
Self::Load {
width,
dest,
offset,
base,
} => write!(f, "l{} {dest:?}, {offset}({base:?})", width::str(*width)),
Self::Store {
width,
src,
offset,
base,
} => write!(f, "s{} {src:?}, {offset}({base:?})", width::str(*width)),
Self::Op {
op,
funct,
dest,
src1,
src2,
} => write!(f, "{} {dest:?}, {src1:?}, {src2:?}", opstr(*op, *funct)),
Self::OpImm { op, dest, src, imm } => {
write!(f, "{}i {dest:?}, {src:?}, {imm}", opstr(*op, op32i::FUNCT7))
}
Self::OpImmF7 {
op,
funct,
dest,
src,
imm,
} => write!(f, "{}i {dest:?}, {src:?}, {imm}", opstr(*op, *funct)),
Self::Jal { dest, offset } => write!(f, "jal {dest:?}, {offset:?}"),
Self::Call(s) => {
write!(f, "call ")?;
label(f, s)
}
Self::J(s) => {
write!(f, "j ")?;
label(f, s)
}
Self::Branch {
to,
typ,
left,
right,
} => write!(f, "b{} {left:?} {right:?} {to:?}", branch::str(*typ)),
Self::Ret => write!(f, "ret"),
}
}
}
-224
View File
@@ -1,224 +0,0 @@
use std::collections::HashMap;
use crate::{
compiler::{arch::riscv::Reg, debug::DebugInfo, UnlinkedFunction, UnlinkedProgram},
ir::{arch::riscv64::RegRef, LInstruction as IRI, LProgram, Len, Size, VarID},
};
use super::{LinkerInstruction as LI, *};
fn align(s: &Size) -> i32 {
(*s as i32 - 1).div_euclid(8) + 1
}
fn mov_mem(
v: &mut Vec<LI>,
src: Reg,
src_offset: i32,
dest: Reg,
dest_offset: i32,
temp: Reg,
mut len: Len,
) {
let mut off = 0;
for width in width::MAIN.iter().rev().copied() {
let wl = width::len(width);
while len >= wl {
v.extend([
LI::Load {
width,
dest: temp,
offset: src_offset + off,
base: src,
},
LI::Store {
width,
src: temp,
offset: dest_offset + off,
base: dest,
},
]);
len -= wl;
off += wl as i32;
}
}
}
pub fn compile(program: &LProgram) -> UnlinkedProgram<LI> {
let mut fns = Vec::new();
let mut data = Vec::new();
let mut dbg = DebugInfo::new(program.labels().to_vec());
for (sym, d) in program.ro_data() {
data.push((d.clone(), *sym));
}
for (sym, f) in program.fns() {
let mut v = Vec::new();
let mut stack = HashMap::new();
let mut stack_len = 0;
let mut stack_ra = None;
let mut stack_rva = None;
if f.makes_call {
// return addr
stack_ra = Some(stack_len);
stack_len += 8;
}
for (id, s) in &f.stack {
stack.insert(id, stack_len);
stack_len += align(s);
}
for (id, s) in f.args.iter().rev() {
stack.insert(id, stack_len);
stack_len += align(s);
}
if f.ret_size > 0 {
stack_rva = Some(stack_len);
stack_len += align(&f.ret_size);
}
v.push(LI::addi(sp, sp, -stack_len));
for (id, var) in &f.subvar_map {
// TODO: ALIGN DOES NOT MAKE SENSE HERE!!! need to choose to decide in lower or asm
stack.insert(id, stack[&var.id] + align(&var.offset));
}
let has_stack = stack_len > 0;
if has_stack {
if let Some(stack_ra) = stack_ra {
v.push(LI::sd(ra, stack_ra, sp));
}
}
let mut locations = HashMap::new();
let mut irli = Vec::new();
let mut ret = Vec::new();
if has_stack {
if let Some(stack_ra) = stack_ra {
ret.push(LI::ld(ra, stack_ra, sp));
}
ret.push(LI::addi(sp, sp, stack_len));
}
ret.push(LI::Ret);
for i in &f.instructions {
irli.push((v.len(), format!("{i:?}")));
match i {
IRI::Mv {
dst: dest,
dst_offset: dest_offset,
src,
src_offset,
} => {
let s = align(&f.stack[src]) as u32;
mov_mem(
&mut v,
sp,
stack[src] + align(src_offset),
sp,
stack[dest] + align(dest_offset),
t0,
s,
);
}
IRI::Ref { dst: dest, src } => {
v.push(LI::addi(t0, sp, stack[src]));
v.push(LI::sd(t0, stack[dest], sp));
}
IRI::LoadAddr {
dst: dest,
offset,
src,
} => {
v.extend([
LI::La {
dest: t0,
src: *src,
},
LI::sd(t0, stack[dest] + *offset as i32, sp),
]);
}
IRI::LoadData {
dst: dest,
offset,
src,
len,
} => {
v.push(LI::La {
dest: t0,
src: *src,
});
mov_mem(&mut v, t0, 0, sp, stack[dest] + *offset as i32, t1, *len);
}
IRI::Call { dst: dest, f, args } => {
let mut offset = 0;
if let Some((dest, s)) = dest {
offset -= align(s);
v.push(LI::addi(t0, sp, stack[&dest]));
v.push(LI::sd(t0, offset, sp))
}
for (arg, s) in args {
let bs = align(s);
offset -= bs;
mov_mem(&mut v, sp, stack[arg], sp, offset, t0, bs as Len);
}
v.push(LI::Call(*f));
}
IRI::AsmBlock {
inputs,
outputs,
instructions,
} => {
for (reg, var) in inputs {
v.push(LI::ld(*reg, stack[var], sp));
}
fn r(rr: &RegRef<VarID>) -> Reg {
match rr {
RegRef::Var(..) => todo!(),
RegRef::Reg(reg) => *reg,
}
}
for i in instructions {
v.push(i.map(|v| r(v)));
}
for (reg, var) in outputs {
v.push(LI::sd(*reg, stack[var], sp));
}
}
IRI::Ret { src } => {
if let Some(src) = src {
let Some(rva) = stack_rva else {
panic!("no return value address on stack!")
};
v.push(LI::ld(t0, rva, sp));
mov_mem(&mut v, sp, stack[src], t0, 0, t1, align(&f.ret_size) as u32);
}
v.extend(&ret);
}
IRI::Jump(location) => {
v.push(LI::J(*location));
}
IRI::Branch { to, cond } => {
v.push(LI::ld(t0, stack[cond], sp));
v.push(LI::Branch {
to: *to,
typ: branch::EQ,
left: t0,
right: zero,
})
}
IRI::Mark(location) => {
locations.insert(v.len(), *location);
}
}
}
dbg.push_fn(irli);
fns.push(UnlinkedFunction {
instrs: v,
sym: *sym,
locations,
});
}
UnlinkedProgram {
fns,
ro_data: data,
start: Some(program.entry()),
dbg,
sym_count: program.len(),
}
}
-94
View File
@@ -1,94 +0,0 @@
use crate::{
compiler::arch::riscv::Reg,
util::{Bits32, BitsI32},
};
pub struct RawInstruction(u32);
impl RawInstruction {
pub fn to_le_bytes(&self) -> impl IntoIterator<Item = u8> {
self.0.to_le_bytes().into_iter()
}
pub fn to_be_bytes(&self) -> impl IntoIterator<Item = u8> {
self.0.to_be_bytes().into_iter()
}
}
pub const SYSTEM: u32 = 0b1110011;
pub const LOAD: u32 = 0b0000011;
pub const STORE: u32 = 0b0100011;
pub const AUIPC: u32 = 0b0010111;
pub const IMM_OP: u32 = 0b0010011;
pub const OP: u32 = 0b0110011;
pub const JAL: u32 = 0b1101111;
pub const JALR: u32 = 0b1100111;
pub const BRANCH: u32 = 0b1100011;
pub type Funct3 = Bits32<2, 0>;
pub type Funct7 = Bits32<6, 0>;
use RawInstruction as I;
pub const fn r_type(
funct7: Bits32<6, 0>,
rs2: Reg,
rs1: Reg,
funct3: Bits32<2, 0>,
rd: Reg,
opcode: u32,
) -> I {
I((funct7.val() << 25)
+ (rs2.val() << 20)
+ (rs1.val() << 15)
+ (funct3.val() << 12)
+ (rd.val() << 7)
+ opcode)
}
pub const fn i_type(imm: Bits32<11, 0>, rs1: Reg, funct: Funct3, rd: Reg, opcode: u32) -> I {
I((imm.val() << 20) + (rs1.val() << 15) + (funct.val() << 12) + (rd.val() << 7) + opcode)
}
pub const fn s_type(rs2: Reg, rs1: Reg, funct3: Funct3, imm: Bits32<11, 0>, opcode: u32) -> I {
I((imm.bits(11, 5) << 25)
+ (rs2.val() << 20)
+ (rs1.val() << 15)
+ (funct3.val() << 12)
+ (imm.bits(4, 0) << 7)
+ opcode)
}
pub const fn b_type(rs2: Reg, rs1: Reg, funct3: Funct3, imm: Bits32<12, 1>, opcode: u32) -> I {
I((imm.bit(12) << 31)
+ (imm.bits(10, 5) << 25)
+ (rs2.val() << 20)
+ (rs1.val() << 15)
+ (funct3.val() << 12)
+ (imm.bits(4, 1) << 8)
+ (imm.bit(11) << 7)
+ opcode)
}
pub const fn u_type(imm: Bits32<31, 12>, rd: Reg, opcode: u32) -> I {
I((imm.bits(31, 12) << 12) + (rd.val() << 7) + opcode)
}
pub const fn j_type(imm: Bits32<20, 1>, rd: Reg, opcode: u32) -> I {
I((imm.bit(20) << 31)
+ (imm.bits(10, 1) << 21)
+ (imm.bit(11) << 20)
+ (imm.bits(19, 12) << 12)
+ (rd.val() << 7)
+ opcode)
}
pub fn opr(op: Funct3, funct: Funct7, dest: Reg, src1: Reg, src2: Reg) -> I {
r_type(funct, src2, src1, op, dest, OP)
}
pub fn opi(op: Funct3, dest: Reg, src: Reg, imm: Bits32<11, 0>) -> RawInstruction {
i_type(imm, src, op, dest, IMM_OP)
}
pub fn opif7(op: Funct3, funct: Funct7, dest: Reg, src: Reg, imm: BitsI32<4, 0>) -> I {
i_type(
Bits32::new(imm.to_u().val() + (funct.val() << 5)),
src,
op,
dest,
IMM_OP,
)
}
-10
View File
@@ -1,10 +0,0 @@
use super::*;
mod base;
mod rv32i;
mod rv32m;
mod string;
pub use base::*;
pub use rv32i::*;
pub use rv32m::*;
pub use string::*;
-122
View File
@@ -1,122 +0,0 @@
use crate::{compiler::arch::riscv::Reg, util::Bits32};
use super::*;
pub mod op32i {
use super::*;
pub const ADD: Funct3 = Funct3::new(0b000);
pub const SL: Funct3 = Funct3::new(0b001);
pub const SLT: Funct3 = Funct3::new(0b010);
pub const SLTU: Funct3 = Funct3::new(0b011);
pub const XOR: Funct3 = Funct3::new(0b100);
pub const SR: Funct3 = Funct3::new(0b101);
pub const OR: Funct3 = Funct3::new(0b110);
pub const AND: Funct3 = Funct3::new(0b111);
pub const LOGICAL: Funct7 = Funct7::new(0b0000000);
pub const ARITHMETIC: Funct7 = Funct7::new(0b0100000);
pub const F7ADD: Funct7 = Funct7::new(0b0000000);
pub const F7SUB: Funct7 = Funct7::new(0b0100000);
pub const FUNCT7: Funct7 = Funct7::new(0b0000000);
}
pub mod width {
use crate::ir::Len;
use super::*;
pub const MAIN: [Funct3; 4] = [B, H, W, D];
pub const B: Funct3 = Funct3::new(0b000);
pub const H: Funct3 = Funct3::new(0b001);
pub const W: Funct3 = Funct3::new(0b010);
pub const D: Funct3 = Funct3::new(0b011);
pub const BU: Funct3 = Funct3::new(0b100);
pub const HU: Funct3 = Funct3::new(0b101);
pub const WU: Funct3 = Funct3::new(0b110);
pub const fn str(w: Funct3) -> &'static str {
match w {
B => "b",
H => "h",
W => "w",
D => "d",
BU => "bu",
HU => "hu",
WU => "wu",
_ => unreachable!(),
}
}
pub const fn len(w: Funct3) -> Len {
match w {
B => 1,
H => 2,
W => 4,
D => 8,
BU => 1,
HU => 2,
WU => 4,
_ => unreachable!(),
}
}
}
pub mod branch {
use super::*;
pub const EQ: Funct3 = Funct3::new(0b000);
pub const NE: Funct3 = Funct3::new(0b001);
pub const LT: Funct3 = Funct3::new(0b100);
pub const GE: Funct3 = Funct3::new(0b101);
pub const LTU: Funct3 = Funct3::new(0b110);
pub const GEU: Funct3 = Funct3::new(0b111);
pub fn str(f: Funct3) -> &'static str {
match f {
EQ => "eq",
NE => "ne",
LT => "lt",
GE => "ge",
LTU => "ltu",
GEU => "geu",
_ => "?",
}
}
}
pub const fn ecall() -> RawInstruction {
i_type(Bits32::new(0), zero, Bits32::new(0), zero, SYSTEM)
}
pub const fn ebreak() -> RawInstruction {
i_type(Bits32::new(1), zero, Bits32::new(0), zero, SYSTEM)
}
pub const fn auipc(dest: Reg, imm: BitsI32<31, 12>) -> RawInstruction {
u_type(imm.to_u(), dest, AUIPC)
}
pub const fn load(width: Funct3, dest: Reg, offset: BitsI32<11, 0>, base: Reg) -> RawInstruction {
i_type(offset.to_u(), base, width, dest, LOAD)
}
pub const fn store(width: Funct3, src: Reg, offset: BitsI32<11, 0>, base: Reg) -> RawInstruction {
s_type(src, base, width, offset.to_u(), STORE)
}
pub const fn jal(dest: Reg, offset: BitsI32<20, 1>) -> RawInstruction {
j_type(offset.to_u(), dest, JAL)
}
pub const fn jalr(dest: Reg, offset: BitsI32<11, 0>, base: Reg) -> RawInstruction {
i_type(offset.to_u(), base, Bits32::new(0), dest, JALR)
}
pub const fn j(offset: BitsI32<20, 1>) -> RawInstruction {
jal(zero, offset)
}
pub const fn ret() -> RawInstruction {
jalr(zero, BitsI32::new(0), ra)
}
pub const fn branch(typ: Funct3, left: Reg, right: Reg, offset: BitsI32<12, 1>) -> RawInstruction {
b_type(right, left, typ, offset.to_u(), BRANCH)
}
-16
View File
@@ -1,16 +0,0 @@
use super::{Funct3, Funct7};
pub mod op32m {
use super::*;
pub const MUL: Funct3 = Funct3::new(0b000);
pub const MULH: Funct3 = Funct3::new(0b001);
pub const MULHSU: Funct3 = Funct3::new(0b010);
pub const MULHU: Funct3 = Funct3::new(0b011);
pub const DIV: Funct3 = Funct3::new(0b100);
pub const DIVU: Funct3 = Funct3::new(0b101);
pub const REM: Funct3 = Funct3::new(0b110);
pub const REMU: Funct3 = Funct3::new(0b111);
pub const FUNCT7: Funct7 = Funct7::new(0b0000001);
}
-27
View File
@@ -1,27 +0,0 @@
use super::*;
pub fn opstr(op: Funct3, funct: Funct7) -> &'static str {
match (op, funct) {
(op32i::SLT, op32i::FUNCT7) => "slt",
(op32i::SLTU, op32i::FUNCT7) => "sltu",
(op32i::XOR, op32i::FUNCT7) => "xor",
(op32i::OR, op32i::FUNCT7) => "or",
(op32i::AND, op32i::FUNCT7) => "and",
(op32i::ADD, op32i::F7ADD) => "add",
(op32i::ADD, op32i::F7SUB) => "sub",
(op32i::SL, op32i::LOGICAL) => "sll",
(op32i::SR, op32i::LOGICAL) => "srl",
(op32i::SR, op32i::ARITHMETIC) => "sra",
(op32m::MUL, op32m::FUNCT7) => "mul",
(op32m::MULH, op32m::FUNCT7) => "mulh",
(op32m::MULHSU, op32m::FUNCT7) => "mulhsu",
(op32m::MULHU, op32m::FUNCT7) => "mulhu",
(op32m::DIV, op32m::FUNCT7) => "div",
(op32m::DIVU, op32m::FUNCT7) => "divu",
(op32m::REM, op32m::FUNCT7) => "rem",
(op32m::REMU, op32m::FUNCT7) => "remu",
_ => "unknown",
}
}
-11
View File
@@ -1,11 +0,0 @@
mod asm;
mod compile;
mod reg;
mod instr;
use crate::util::BitsI32;
pub use asm::*;
pub use compile::*;
pub use reg::*;
pub use instr::*;
-180
View File
@@ -1,180 +0,0 @@
#![allow(non_upper_case_globals)]
#[derive(Clone, Copy)]
pub struct Reg(u8);
/// hard wired 0
pub const zero: Reg = Reg(0);
/// return address
pub const ra: Reg = Reg(1);
/// stack pointer
pub const sp: Reg = Reg(2);
/// global pointer
pub const gp: Reg = Reg(3);
/// thread pointer
pub const tp: Reg = Reg(4);
/// temp / alternate link
pub const t0: Reg = Reg(5);
pub const t1: Reg = Reg(6);
pub const t2: Reg = Reg(7);
pub const fp: Reg = Reg(8);
pub const s0: Reg = Reg(8);
pub const s1: Reg = Reg(9);
pub const a0: Reg = Reg(10);
pub const a1: Reg = Reg(11);
pub const a2: Reg = Reg(12);
pub const a3: Reg = Reg(13);
pub const a4: Reg = Reg(14);
pub const a5: Reg = Reg(15);
pub const a6: Reg = Reg(16);
pub const a7: Reg = Reg(17);
pub const s2: Reg = Reg(18);
pub const s3: Reg = Reg(19);
pub const s4: Reg = Reg(20);
pub const s5: Reg = Reg(21);
pub const s6: Reg = Reg(22);
pub const s7: Reg = Reg(23);
pub const s8: Reg = Reg(24);
pub const s9: Reg = Reg(25);
pub const s10: Reg = Reg(26);
pub const s11: Reg = Reg(27);
pub const t3: Reg = Reg(28);
pub const t4: Reg = Reg(29);
pub const t5: Reg = Reg(30);
pub const t6: Reg = Reg(31);
impl Reg {
#[inline]
pub const fn val(&self) -> u32 {
self.0 as u32
}
}
impl Reg {
pub fn from_str(str: &str) -> Option<Self> {
Some(match str {
"zero" => zero,
"ra" => ra,
"sp" => sp,
"gp" => gp,
"tp" => tp,
"t0" => t0,
"t1" => t1,
"t2" => t2,
"fp" => fp,
"s0" => s0,
"s1" => s1,
"a0" => a0,
"a1" => a1,
"a2" => a2,
"a3" => a3,
"a4" => a4,
"a5" => a5,
"a6" => a6,
"a7" => a7,
"s2" => s2,
"s3" => s3,
"s4" => s4,
"s5" => s5,
"s6" => s6,
"s7" => s7,
"s8" => s8,
"s9" => s9,
"s10" => s10,
"s11" => s11,
"t3" => t3,
"t4" => t4,
"t5" => t5,
"t6" => t6,
_ => {
return None;
}
})
}
}
impl std::fmt::Debug for Reg {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self.0 {
0 => "zero",
1 => "ra",
2 => "sp",
3 => "gp",
4 => "tp",
5 => "t0",
6 => "t1",
7 => "t2",
8 => "fp",
9 => "s1",
10 => "a0",
11 => "a1",
12 => "a2",
13 => "a3",
14 => "a4",
15 => "a5",
16 => "a6",
17 => "a7",
18 => "s2",
19 => "s3",
20 => "s4",
21 => "s5",
22 => "s6",
23 => "s7",
24 => "s8",
25 => "s9",
26 => "s10",
27 => "s11",
28 => "t3",
29 => "t4",
30 => "t5",
31 => "t6",
_ => "unknown",
}
)
}
}
// pub const ft0: Reg = Reg(0);
// pub const ft1: Reg = Reg(1);
// pub const ft2: Reg = Reg(2);
// pub const ft3: Reg = Reg(3);
// pub const ft4: Reg = Reg(4);
// pub const ft5: Reg = Reg(5);
// pub const ft6: Reg = Reg(6);
// pub const ft7: Reg = Reg(7);
//
// pub const fs0: Reg = Reg(8);
// pub const fs1: Reg = Reg(9);
//
// pub const fa0: Reg = Reg(10);
// pub const fa1: Reg = Reg(11);
// pub const fa2: Reg = Reg(12);
// pub const fa3: Reg = Reg(13);
// pub const fa4: Reg = Reg(14);
// pub const fa5: Reg = Reg(15);
// pub const fa6: Reg = Reg(16);
// pub const fa7: Reg = Reg(17);
//
// pub const fs2: Reg = Reg(18);
// pub const fs3: Reg = Reg(19);
// pub const fs4: Reg = Reg(20);
// pub const fs5: Reg = Reg(21);
// pub const fs6: Reg = Reg(22);
// pub const fs7: Reg = Reg(23);
// pub const fs8: Reg = Reg(24);
// pub const fs9: Reg = Reg(25);
// pub const fs10: Reg = Reg(26);
// pub const fs11: Reg = Reg(27);
//
// pub const ft8: Reg = Reg(28);
// pub const ft9: Reg = Reg(29);
// pub const ft10: Reg = Reg(30);
// pub const ft11: Reg = Reg(31);
-23
View File
@@ -1,23 +0,0 @@
use crate::ir::Symbol;
pub struct DebugInfo {
pub sym_labels: Vec<Option<String>>,
pub ir_lower: Vec<Vec<(usize, String)>>,
}
impl DebugInfo {
pub fn new(sym_labels: Vec<Option<String>>) -> Self {
Self {
ir_lower: Vec::new(),
sym_labels,
}
}
pub fn push_fn(&mut self, instrs: Vec<(usize, String)>) {
self.ir_lower.push(instrs);
}
pub fn sym_label(&self, s: Symbol) -> Option<&String> {
self.sym_labels[*s].as_ref()
}
}
-3
View File
@@ -1,3 +0,0 @@
pub enum Instruction {
}
-14
View File
@@ -1,14 +0,0 @@
pub mod arch;
mod debug;
mod elf;
mod program;
mod target;
use arch::riscv;
pub use program::*;
use crate::ir::LProgram;
pub fn compile(program: &LProgram) -> UnlinkedProgram<riscv::LinkerInstruction> {
arch::riscv::compile(program)
}
-138
View File
@@ -1,138 +0,0 @@
use std::collections::HashMap;
use crate::{
ir::Symbol,
util::{Labelable, LabeledFmt},
};
use super::debug::DebugInfo;
pub struct LinkedProgram {
pub code: Vec<u8>,
pub start: Option<Addr>,
}
pub struct UnlinkedProgram<I: Instr> {
pub fns: Vec<UnlinkedFunction<I>>,
pub ro_data: Vec<(Vec<u8>, Symbol)>,
pub sym_count: usize,
pub start: Option<Symbol>,
pub dbg: DebugInfo,
}
pub struct UnlinkedFunction<I: Instr> {
pub instrs: Vec<I>,
pub sym: Symbol,
pub locations: HashMap<usize, Symbol>,
}
impl<I: Instr + std::fmt::Debug> UnlinkedProgram<I> {
pub fn link(self) -> LinkedProgram {
let mut data = Vec::new();
let mut sym_table = SymTable::new(self.sym_count);
let mut missing = HashMap::<Symbol, Vec<(Addr, I)>>::new();
for (val, id) in self.ro_data {
sym_table.insert(id, Addr(data.len() as u64));
data.extend(val);
}
data.resize(data.len() + (4 - data.len() % 4), 0);
for f in self.fns {
let mut added = vec![f.sym];
sym_table.insert(f.sym, Addr(data.len() as u64));
for (i, instr) in f.instrs.into_iter().enumerate() {
let i_pos = Addr(data.len() as u64);
if let Some(sym) = f.locations.get(&i) {
sym_table.insert(*sym, i_pos);
added.push(*sym);
}
if let Some(sym) = instr.push_to(&mut data, &mut sym_table, i_pos, false) {
if let Some(vec) = missing.get_mut(&sym) {
vec.push((i_pos, instr));
} else {
missing.insert(sym, vec![(i_pos, instr)]);
}
}
}
for add in added {
if let Some(vec) = missing.remove(&add) {
for (addr, i) in vec {
let mut replace = Vec::new();
i.push_to(&mut replace, &mut sym_table, addr, true);
let pos = addr.val() as usize;
data[pos..pos + replace.len()].copy_from_slice(&replace);
}
}
}
}
assert!(missing.is_empty());
LinkedProgram {
code: data,
start: self
.start
.map(|s| sym_table.get(s).expect("start symbol doesn't exist")),
}
}
}
pub trait Instr {
fn push_to(
&self,
data: &mut Vec<u8>,
syms: &mut SymTable,
pos: Addr,
missing: bool,
) -> Option<Symbol>;
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Addr(u64);
impl Addr {
const NONE: Self = Self(!0);
pub fn val(&self) -> u64 {
self.0
}
}
pub struct SymTable(Vec<Addr>);
impl SymTable {
pub fn new(len: usize) -> Self {
Self(vec![Addr::NONE; len])
}
pub fn insert(&mut self, sym: Symbol, addr: Addr) {
self.0[*sym] = addr;
}
pub fn get(&self, sym: Symbol) -> Option<Addr> {
match self.0[*sym] {
Addr::NONE => None,
addr => Some(addr),
}
}
}
impl<I: Instr + Labelable<Symbol> + LabeledFmt<Symbol>> std::fmt::Debug for UnlinkedProgram<I> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (fun, irli) in self.fns.iter().zip(&self.dbg.ir_lower) {
writeln!(f, "{}:", self.dbg.sym_label(fun.sym).unwrap())?;
let mut liter = irli.iter();
let mut cur = liter.next();
for (i, instr) in fun.instrs.iter().enumerate() {
while let Some(c) = cur
&& i == c.0
{
writeln!(f, " {}:", c.1)?;
cur = liter.next();
}
writeln!(
f,
" {:?}",
instr.labeled(&|f: &mut std::fmt::Formatter, s: &Symbol| write!(
f,
"{}",
self.dbg.sym_label(*s).unwrap_or(&format!("{:?}", *s))
))
)?;
}
}
Ok(())
}
}
-7
View File
@@ -1,7 +0,0 @@
pub trait Target {
type Reg;
}
pub trait RegType {
type Size;
}
+142
View File
@@ -0,0 +1,142 @@
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, Copy)]
pub struct Span {
pub file: usize,
pub start: usize,
pub end: usize,
}
pub struct Spanned<T> {
pub inner: T,
pub span: Span,
}
impl<T> std::ops::Deref for Spanned<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl<T> std::ops::DerefMut for Spanned<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
#[derive(Debug)]
pub struct CompilerMsg {
pub spans: Vec<Span>,
pub msg: String,
}
#[derive(Default)]
pub struct CompilerOutput {
pub errors: Vec<CompilerMsg>,
pub files: Vec<PathBuf>,
}
impl CompilerOutput {
pub fn new() -> Self {
Self::default()
}
pub fn error(&mut self, msg: impl Into<CompilerMsg>) {
self.errors.push(msg.into());
}
pub fn write(&self, w: &mut impl std::io::Write) {
let files: Vec<_> = self
.files
.iter()
.map(|path| std::fs::read_to_string(path).unwrap())
.collect();
for error in &self.errors {
writeln!(w, "Error: {}", error.msg).unwrap();
for span in &error.spans {
span.write(w, &files[span.file]).unwrap();
}
}
}
}
impl Span {
pub fn write(&self, w: &mut impl std::io::Write, text: &str) -> std::io::Result<()> {
let mut line_start = 0;
let mut found = false;
let mut line = 1;
let mut spans = Vec::new();
for (i, c) in text.char_indices() {
if i == self.start {
found = true;
}
if i == self.end {
found = true;
}
if c == '\n' {
if found {
spans.push((line, line_start..i));
}
line_start = i + 1;
line += 1;
found = false;
}
}
let underline = "\x1b[4:3m";
let underline_color = "\x1b[58;5;1m";
let end = "\x1b[0m";
if let [(line, range)] = &spans[..] {
writeln!(
w,
" {line:3} | {}{underline}{underline_color}{}{end}{}",
&text[range.start..self.start],
&text[self.start..=self.end],
&text[(self.end + 1)..range.end]
)?;
} else if let [(sline, srange), (eline, erange)] = &spans[..] {
writeln!(
w,
" {sline:3} | {}{underline}{underline_color}{}{end}",
&text[srange.start..self.start],
&text[self.start..=srange.end - 1],
)?;
if *eline != *sline + 1 {
writeln!(w, " ...")?;
}
writeln!(
w,
" {eline:3} | {underline}{underline_color}{}{end}{}",
&text[erange.start..=self.end],
&text[(self.end + 1)..=erange.end - 1],
)?;
}
Ok(())
}
}
impl From<String> for CompilerMsg {
fn from(msg: String) -> Self {
Self {
spans: Vec::new(),
msg,
}
}
}
impl From<&str> for CompilerMsg {
fn from(msg: &str) -> Self {
Self {
spans: Vec::new(),
msg: msg.to_string(),
}
}
}
impl<S: Into<String>> From<(S, Span)> for CompilerMsg {
fn from((msg, span): (S, Span)) -> Self {
Self {
spans: vec![span],
msg: msg.into(),
}
}
}
-1
View File
@@ -1 +0,0 @@
pub mod riscv64;
-20
View File
@@ -1,20 +0,0 @@
use std::fmt::Debug;
use crate::{compiler::arch::riscv::*, ir::IdentID};
pub type RV64Instruction<V = IdentID> = LinkerInstruction<RegRef<V>, V>;
#[derive(Copy, Clone)]
pub enum RegRef<V = IdentID, R = Reg> {
Var(V),
Reg(R),
}
impl<V: Debug, R: Debug> Debug for RegRef<V, R> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Var(v) => write!(f, "{{{:?}}}", v),
Self::Reg(r) => r.fmt(f),
}
}
}
-7
View File
@@ -1,7 +0,0 @@
use super::{arch::riscv64::RegRef, IdentID};
#[derive(Clone)]
pub struct IRAsmInstruction {
op: String,
args: Vec<RegRef<IdentID, String>>,
}
+32 -117
View File
@@ -1,136 +1,51 @@
use std::{
fmt::Debug,
marker::PhantomData,
ops::{Index, IndexMut},
};
use std::ops::{Index, IndexMut};
// I had an idea for why these were different... now I don't
pub type Size = u32;
pub type Len = u32;
pub struct Id<T> {
idx: usize,
_pd: std::marker::PhantomData<T>,
}
pub struct ID<T>(pub usize, PhantomData<T>);
pub struct IdVec<T> {
vec: Vec<T>,
}
impl<T> ID<T> {
pub fn new(i: usize) -> Self {
Self(i, PhantomData)
impl<T> IdVec<T> {
pub fn add(&mut self, val: T) -> Id<T> {
let id = Id {
idx: self.vec.len(),
_pd: Default::default(),
};
self.vec.push(val);
id
}
}
impl<T> From<usize> for ID<T> {
fn from(value: usize) -> Self {
Self(value, PhantomData)
impl<T> Index<Id<T>> for IdVec<T> {
type Output = T;
fn index(&self, index: Id<T>) -> &Self::Output {
&self.vec[index.idx]
}
}
impl<T> Debug for ID<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{{{}}}", self.0)
impl<T> IndexMut<Id<T>> for IdVec<T> {
fn index_mut(&mut self, index: Id<T>) -> &mut Self::Output {
&mut self.vec[index.idx]
}
}
impl<T> PartialEq for ID<T> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
impl<T> Default for IdVec<T> {
fn default() -> Self {
Self {
vec: Default::default(),
}
}
}
impl<T> Eq for ID<T> {}
impl<T> std::hash::Hash for ID<T> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
impl<T> Clone for ID<T> {
impl<T> Clone for Id<T> {
fn clone(&self) -> Self {
Self(self.0.clone(), PhantomData)
*self
}
}
impl<T> Copy for ID<T> {}
// :fear:
impl<T> Index<ID<T>> for Vec<T> {
type Output = T;
fn index(&self, i: ID<T>) -> &Self::Output {
&self[i.0]
}
}
impl<T> IndexMut<ID<T>> for Vec<T> {
fn index_mut(&mut self, i: ID<T>) -> &mut Self::Output {
&mut self[i.0]
}
}
impl<T> Index<&ID<T>> for Vec<T> {
type Output = T;
fn index(&self, i: &ID<T>) -> &Self::Output {
&self[i.0]
}
}
impl<T> IndexMut<&ID<T>> for Vec<T> {
fn index_mut(&mut self, i: &ID<T>) -> &mut Self::Output {
&mut self[i.0]
}
}
impl<T> Index<&mut ID<T>> for Vec<T> {
type Output = T;
fn index(&self, i: &mut ID<T>) -> &Self::Output {
&self[i.0]
}
}
impl<T> IndexMut<&mut ID<T>> for Vec<T> {
fn index_mut(&mut self, i: &mut ID<T>) -> &mut Self::Output {
&mut self[i.0]
}
}
impl<T> Index<ID<T>> for [T] {
type Output = T;
fn index(&self, i: ID<T>) -> &Self::Output {
&self[i.0]
}
}
impl<T> IndexMut<ID<T>> for [T] {
fn index_mut(&mut self, i: ID<T>) -> &mut Self::Output {
&mut self[i.0]
}
}
impl<T> Index<&ID<T>> for [T] {
type Output = T;
fn index(&self, i: &ID<T>) -> &Self::Output {
&self[i.0]
}
}
impl<T> IndexMut<&ID<T>> for [T] {
fn index_mut(&mut self, i: &ID<T>) -> &mut Self::Output {
&mut self[i.0]
}
}
impl<T> Index<&mut ID<T>> for [T] {
type Output = T;
fn index(&self, i: &mut ID<T>) -> &Self::Output {
&self[i.0]
}
}
impl<T> IndexMut<&mut ID<T>> for [T] {
fn index_mut(&mut self, i: &mut ID<T>) -> &mut Self::Output {
&mut self[i.0]
}
}
impl<T> Copy for Id<T> {}
-66
View File
@@ -1,66 +0,0 @@
use super::*;
use crate::{compiler::arch::riscv::Reg, ir::arch::riscv64::RegRef};
use arch::riscv64::RV64Instruction;
use std::collections::HashMap;
#[derive(Debug)]
pub struct IRLFunction {
pub instructions: Vec<LInstruction>,
pub stack: HashMap<VarID, Size>,
pub subvar_map: HashMap<VarID, VarOffset>,
pub args: Vec<(VarID, Size)>,
pub ret_size: Size,
pub makes_call: bool,
}
#[derive(Debug)]
pub enum LInstruction {
Mv {
dst: VarID,
dst_offset: Size,
src: VarID,
src_offset: Size,
},
Ref {
dst: VarID,
src: VarID,
},
LoadAddr {
dst: VarID,
offset: Size,
src: Symbol,
},
LoadData {
dst: VarID,
offset: Size,
src: Symbol,
len: Len,
},
Call {
dst: Option<(VarID, Size)>,
f: Symbol,
args: Vec<(VarID, Size)>,
},
AsmBlock {
instructions: Vec<RV64Instruction<VarID>>,
inputs: Vec<(Reg, VarID)>,
outputs: Vec<(Reg, VarID)>,
},
Ret {
src: Option<VarID>,
},
// TODO I feel like this should be turned into control flow instructions, maybe...
// not sure but LLVM has them so might be right play; seems optimal for optimization
Jump(Symbol),
Branch {
to: Symbol,
cond: VarID,
},
Mark(Symbol),
}
impl LInstruction {
pub fn is_ret(&self) -> bool {
matches!(self, Self::Ret { .. })
}
}
-10
View File
@@ -1,10 +0,0 @@
mod func;
mod program;
mod symbol;
mod res;
pub use func::*;
pub use program::*;
pub use symbol::*;
use super::*;
-465
View File
@@ -1,465 +0,0 @@
use std::collections::HashMap;
use super::{
IRLFunction, LInstruction, Len, Symbol, SymbolSpaceBuilder, UInstruction, UProgram, VarID,
};
use crate::ir::{
AsmBlockArgType, Size, StructInst, SymbolSpace, Type, TypeID, UFunc, UInstrInst, VarOffset,
};
pub struct LProgram {
sym_space: SymbolSpace,
entry: Symbol,
}
// NOTE: there are THREE places here where I specify size (8)
impl LProgram {
pub fn create(p: &UProgram) -> Result<Self, String> {
let start = p
.names
.id::<UFunc>(&[], "crate")
.ok_or("no start method found")?;
let mut ssbuilder = SymbolSpaceBuilder::with_entries(&[start]);
let entry = ssbuilder.func(&start);
while let Some((sym, i)) = ssbuilder.pop_fn() {
let f = &p.fns[i.0];
let mut fbuilder = LFunctionBuilder::new(p, &mut ssbuilder);
for i in &f.instructions {
fbuilder.insert_instr(i);
}
if fbuilder.instrs.last().is_none_or(|i| !i.is_ret()) {
fbuilder.instrs.push(LInstruction::Ret { src: None });
}
let res = fbuilder.finish(f);
ssbuilder.write_fn(sym, res, Some(f.name.clone()));
}
let sym_space = ssbuilder.finish().expect("we failed the mission");
Ok(Self { sym_space, entry })
}
pub fn entry(&self) -> Symbol {
self.entry
}
}
pub struct LStructInst {
offsets: Vec<Len>,
types: Vec<Type>,
order: HashMap<String, usize>,
size: Size,
}
impl LStructInst {
pub fn offset(&self, name: &str) -> Option<Len> {
Some(self.offsets[*self.order.get(name)?])
}
pub fn ty(&self, name: &str) -> Option<&Type> {
Some(&self.types[*self.order.get(name)?])
}
}
pub struct LFunctionBuilder<'a> {
data: LFunctionBuilderData<'a>,
program: &'a UProgram,
}
impl<'a> LFunctionBuilderData<'a> {
pub fn new(builder: &'a mut SymbolSpaceBuilder) -> Self {
Self {
instrs: Vec::new(),
struct_insts: HashMap::new(),
stack: HashMap::new(),
subvar_map: HashMap::new(),
makes_call: false,
builder,
loopp: None,
}
}
}
pub struct LFunctionBuilderData<'a> {
builder: &'a mut SymbolSpaceBuilder,
instrs: Vec<LInstruction>,
stack: HashMap<VarID, Size>,
subvar_map: HashMap<VarID, VarOffset>,
struct_insts: HashMap<StructInst, LStructInst>,
makes_call: bool,
loopp: Option<LoopCtx>,
}
#[derive(Clone, Copy)]
pub struct LoopCtx {
top: Symbol,
bot: Symbol,
}
impl<'a> LFunctionBuilder<'a> {
pub fn new(program: &'a UProgram, builder: &'a mut SymbolSpaceBuilder) -> Self {
Self {
data: LFunctionBuilderData::new(builder),
program,
}
}
pub fn alloc_stack(&mut self, i: VarID) -> Option<()> {
if self
.data
.size_of_var(self.program, i)
.expect("unsized type")
== 0
{
return None;
};
self.map_subvar(i);
let var = self.data.var_offset(self.program, i).expect("var offset");
if !self.stack.contains_key(&var.id) {
let size = self
.data
.size_of_var(self.program, var.id)
.expect("unsized type");
self.data.stack.insert(var.id, size);
}
Some(())
}
pub fn map_subvar(&mut self, i: VarID) {
let off = self.data.var_offset(self.program, i).expect("var offset");
if off.id != i {
self.subvar_map.insert(i, off);
}
}
pub fn insert_instr(&mut self, i: &UInstrInst) -> Option<Option<String>> {
match i
.i
.resolve(self.program)
.expect("failed to resolve during lowering")
{
UInstruction::Mv { dst, src } => {
self.alloc_stack(dst)?;
self.map_subvar(src);
self.instrs.push(LInstruction::Mv {
dst,
dst_offset: 0,
src,
src_offset: 0,
});
}
UInstruction::Ref { dst, src } => {
self.alloc_stack(dst)?;
self.map_subvar(src);
self.instrs.push(LInstruction::Ref { dst, src });
}
UInstruction::Deref { dst, src } => {
todo!()
}
UInstruction::LoadData { dst, src } => {
self.alloc_stack(dst)?;
let data = &self.program.data[src];
let sym = self.data.builder.ro_data(
src,
&data.content,
Some(&self.program.data[src].name),
);
self.instrs.push(LInstruction::LoadData {
dst,
offset: 0,
len: data.content.len() as Len,
src: sym,
});
}
UInstruction::LoadSlice { dst, src } => {
self.alloc_stack(dst)?;
let data = &self.program.data[src];
let Type::Array(_, len) = &self.program.types[data.ty] else {
return Some(Some(format!(
"tried to load {} as slice",
self.program.type_name(&data.ty)
)));
};
let sym = self.data.builder.ro_data(
src,
&data.content,
Some(&self.program.data[src].name),
);
self.instrs.push(LInstruction::LoadAddr {
dst,
offset: 0,
src: sym,
});
let sym = self
.builder
.anon_ro_data(&(*len as u64).to_le_bytes(), Some(format!("len: {}", len)));
self.instrs.push(LInstruction::LoadData {
dst,
offset: 8,
len: 8,
src: sym,
});
}
UInstruction::Call { dst, f, args } => {
self.alloc_stack(dst);
self.makes_call = true;
let sym = self.builder.func(f.id);
let ret_size = self
.data
.size_of_var(self.program, dst)
.expect("unsized type");
let dst = if ret_size > 0 {
Some((dst, ret_size))
} else {
None
};
let call = LInstruction::Call {
dst,
f: sym,
args: args
.into_iter()
.map(|id| {
self.map_subvar(id);
(
id,
self.data
.size_of_var(self.program, id)
.expect("unsized type"),
)
})
.collect(),
};
self.instrs.push(call);
}
UInstruction::AsmBlock { instructions, args } => {
let mut inputs = Vec::new();
let mut outputs = Vec::new();
for a in args {
match a.ty {
AsmBlockArgType::In => {
self.map_subvar(a.var);
inputs.push((a.reg, a.var))
}
AsmBlockArgType::Out => {
self.alloc_stack(a.var)?;
outputs.push((a.reg, a.var));
}
}
}
self.instrs.push(LInstruction::AsmBlock {
instructions: instructions.clone(),
inputs,
outputs,
})
}
UInstruction::Ret { src } => {
self.map_subvar(src);
let src = if self
.data
.size_of_var(self.program, src)
.expect("unsized var")
== 0
{
None
} else {
Some(src)
};
self.data.instrs.push(LInstruction::Ret { src })
}
UInstruction::Construct {
dst,
ref struc,
ref fields,
} => {
self.alloc_stack(dst)?;
for (field, &src) in fields {
self.map_subvar(src);
let i = LInstruction::Mv {
dst,
src,
dst_offset: self
.data
.field_offset(self.program, struc, field)
.expect("field offset"),
src_offset: 0,
};
self.instrs.push(i)
}
}
UInstruction::If { cond, body } => {
self.map_subvar(cond);
let sym = self.builder.reserve();
self.instrs.push(LInstruction::Branch { to: *sym, cond });
for i in body {
self.insert_instr(&i);
}
self.instrs.push(LInstruction::Mark(*sym));
}
UInstruction::Loop { body } => {
let top = self.builder.reserve();
let bot = self.builder.reserve();
let old = self.loopp;
self.loopp = Some(LoopCtx {
bot: *bot,
top: *top,
});
self.instrs.push(LInstruction::Mark(*top));
for i in body {
self.insert_instr(i);
}
self.instrs.push(LInstruction::Jump(*top));
self.instrs.push(LInstruction::Mark(*bot));
self.loopp = old;
}
UInstruction::Break => {
self.data.instrs.push(LInstruction::Jump(
self.data.loopp.expect("Tried to break outside of loop").bot,
));
}
UInstruction::Continue => {
self.data.instrs.push(LInstruction::Jump(
self.data.loopp.expect("Tried to break outside of loop").top,
));
}
};
Some(None)
}
pub fn finish(mut self, f: &UFunc) -> IRLFunction {
IRLFunction {
args: f
.args
.iter()
.map(|a| {
(
*a,
self.data
.size_of_var(self.program, *a)
.expect("unsized type"),
)
})
.collect(),
ret_size: self
.data
.size_of_type(self.program, &f.ret)
.expect("unsized type"),
instructions: self.data.instrs,
makes_call: self.data.makes_call,
stack: self.data.stack,
subvar_map: self.data.subvar_map,
}
}
}
impl LFunctionBuilderData<'_> {
pub fn var_offset(&mut self, p: &UProgram, mut var: VarID) -> Option<VarOffset> {
let mut path = Vec::new();
while let Type::Field(parent) = &p.get(var)?.ty {
var = parent.parent;
path.push(&parent.name);
}
let mut ty = &p.get(var)?.ty;
let mut offset = 0;
while let Type::Struct(sty) = ty {
let Some(name) = path.pop() else {
break;
};
offset += self.field_offset(p, sty, &name)?;
ty = p.struct_field_type(sty, name).expect("bad field");
}
Some(VarOffset { id: var, offset })
}
pub fn addr_size(&self) -> Size {
64
}
pub fn struct_inst(&mut self, p: &UProgram, ty: &StructInst) -> &LStructInst {
// normally I'd let Some(..) here and return, but polonius does not exist :grief:
if self.struct_insts.get(ty).is_none() {
let LStructInst { id, args } = ty;
let struc = p.expect(*id);
let mut types = Vec::new();
let mut sizes = struc
.fields
.iter()
.map(|(n, f)| {
let ty = if let Type::Generic { id } = &f.ty {
struc
.generics
.iter()
.enumerate()
.find_map(|(i, g)| if *g == *id { args.get(i) } else { None })
.unwrap_or(&f.ty)
} else {
&f.ty
};
types.push(ty.clone());
(n, self.size_of_type(p, ty).expect("unsized type"))
})
.collect::<Vec<_>>();
sizes.sort_by(|(n1, s1, ..), (n2, s2, ..)| s1.cmp(s2).then_with(|| n1.cmp(n2)));
let mut offset = 0;
let mut offsets = Vec::new();
let mut order = HashMap::new();
for (i, (name, size)) in sizes.iter().rev().enumerate() {
// TODO: alignment!!!
order.insert(name.to_string(), i);
offsets.push(offset);
offset += size;
}
self.struct_insts.insert(
ty.clone(),
LStructInst {
offsets,
order,
types,
size: offset,
},
);
}
self.struct_insts.get(ty).unwrap()
}
pub fn field_offset(&mut self, p: &UProgram, sty: &StructInst, field: &str) -> Option<Len> {
let inst = self.struct_inst(p, sty);
Some(inst.offset(field)?)
}
pub fn size_of_type(&mut self, p: &UProgram, ty: &TypeID) -> Option<Size> {
// TODO: target matters
Some(match &p.types[ty] {
Type::Bits(b) => *b,
Type::Struct(ty) => self.struct_inst(p, ty).size,
Type::Generic(id) => return None,
// function references are resolved at compile time into direct calls,
// so they don't have any size as arguments
Type::FnInst(fi) => 0,
Type::Ref(_) => self.addr_size(),
Type::Array(ty, len) => self.size_of_type(p, ty)? * len,
Type::Slice(_) => self.addr_size() * 2,
Type::Unit => 0,
_ => return None,
})
}
pub fn size_of_var(&mut self, p: &UProgram, var: VarID) -> Option<Size> {
self.size_of_type(p, &p.get(var)?.ty)
}
}
impl<'a> std::ops::Deref for LFunctionBuilder<'a> {
type Target = LFunctionBuilderData<'a>;
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl<'a> std::ops::DerefMut for LFunctionBuilder<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.data
}
}
impl std::ops::Deref for LProgram {
type Target = SymbolSpace;
fn deref(&self) -> &Self::Target {
&self.sym_space
}
}
-92
View File
@@ -1,92 +0,0 @@
use crate::ir::{
arch::riscv64::{RV64Instruction, RegRef},
AsmBlockArg, Resolved, UInstrInst, UInstruction, UProgram, VarID,
};
impl UInstrInst {
pub fn resolve<'a>(&'a self, p: &'a UProgram) -> Option<UInstrInst<Resolved>> {
Some(UInstrInst {
i: self.i.resolve(p)?,
origin: self.origin,
})
}
}
impl UInstruction {
pub fn resolve<'a>(&'a self, p: &'a UProgram) -> Option<UInstruction<Resolved>> {
use UInstruction as I;
Some(match self {
I::Mv { dst, src } => I::Mv {
dst: dst.var(p)?,
src: src.var(p)?,
},
I::Ref { dst, src } => I::Ref {
dst: dst.var(p)?,
src: src.var(p)?,
},
I::Deref { dst, src } => I::Deref {
dst: dst.var(p)?,
src: src.var(p)?,
},
I::LoadData { dst, src } => I::LoadData {
dst: dst.var(p)?,
src: *src,
},
I::LoadSlice { dst, src } => I::LoadSlice {
dst: dst.var(p)?,
src: *src,
},
I::Call { dst, f, args } => I::Call {
dst: dst.var(p)?,
f: f.fun(p)?.clone(),
args: args.iter().map(|i| i.var(p)).try_collect()?,
},
I::AsmBlock { instructions, args } => I::AsmBlock {
instructions: instructions
.iter()
.map(|i| i.resolve(p))
.collect::<Option<_>>()?,
args: args.iter().map(|a| a.resolve(p)).try_collect()?,
},
I::Ret { src } => I::Ret { src: src.var(p)? },
I::Construct { dst, struc, fields } => I::Construct {
dst: dst.var(p)?,
struc: struc.struc(p)?.clone(),
fields: fields
.iter()
.map(|(name, ident)| ident.var(p).map(|i| (name.clone(), i)))
.collect::<Option<_>>()?,
},
I::If { cond, body } => I::If {
cond: cond.var(p)?,
body: body.iter().map(|i| i.resolve(p)).try_collect()?,
},
I::Loop { body } => I::Loop {
body: body.iter().map(|i| i.resolve(p)).try_collect()?,
},
I::Break => I::Break,
I::Continue => I::Continue,
})
}
}
impl AsmBlockArg {
pub fn resolve(&self, p: &UProgram) -> Option<AsmBlockArg<VarID>> {
Some(AsmBlockArg {
var: self.var.var(p)?,
reg: self.reg,
ty: self.ty,
})
}
}
impl RV64Instruction {
pub fn resolve(&self, p: &UProgram) -> Option<RV64Instruction<VarID>> {
self.try_map(|i| {
Some(match i {
RegRef::Var(v) => RegRef::Var(v.var(p)?),
RegRef::Reg(r) => RegRef::Reg(*r),
})
})
}
}
-153
View File
@@ -1,153 +0,0 @@
use std::collections::HashMap;
use super::{DataID, FnID, IRLFunction};
#[derive(Clone, Copy, Hash, PartialEq, Eq)]
pub struct Symbol(usize);
/// intentionally does not have copy or clone;
/// this should only be consumed once
pub struct WritableSymbol(Symbol);
impl std::ops::Deref for WritableSymbol {
type Target = Symbol;
fn deref(&self) -> &Self::Target {
&self.0
}
}
pub struct SymbolSpace {
ro_data: Vec<(Symbol, Vec<u8>)>,
fns: Vec<(Symbol, IRLFunction)>,
len: usize,
labels: Vec<Option<String>>,
}
pub struct SymbolSpaceBuilder {
symbols: usize,
unwritten_fns: Vec<(WritableSymbol, FnID)>,
fn_map: HashMap<FnID, Symbol>,
data_map: HashMap<DataID, Symbol>,
ro_data: Vec<(Symbol, Vec<u8>)>,
fns: Vec<(Symbol, IRLFunction)>,
labels: Vec<Option<String>>,
}
impl SymbolSpace {
pub fn ro_data(&self) -> &[(Symbol, Vec<u8>)] {
&self.ro_data
}
pub fn fns(&self) -> &[(Symbol, IRLFunction)] {
&self.fns
}
pub fn labels(&self) -> &[Option<String>] {
&self.labels
}
pub fn len(&self) -> usize {
self.len
}
}
impl SymbolSpaceBuilder {
pub fn new() -> Self {
Self {
symbols: 0,
unwritten_fns: Vec::new(),
fn_map: HashMap::new(),
data_map: HashMap::new(),
ro_data: Vec::new(),
fns: Vec::new(),
labels: Vec::new(),
}
}
pub fn with_entries(entries: &[FnID]) -> SymbolSpaceBuilder {
let mut s = Self::new();
for e in entries {
s.func(*e);
}
s
}
pub fn pop_fn(&mut self) -> Option<(WritableSymbol, FnID)> {
self.unwritten_fns.pop()
}
pub fn anon_ro_data(&mut self, data: &[u8], label: Option<String>) -> Symbol {
let sym = self.reserve();
self.write_ro_data(sym, data.to_vec(), label)
}
pub fn ro_data(&mut self, id: DataID, data: &[u8], label: Option<&str>) -> Symbol {
match self.data_map.get(&id) {
Some(s) => *s,
None => {
let sym = self.reserve();
self.data_map.insert(id, *sym);
self.write_ro_data(sym, data.to_vec(), label.map(|l| l.to_string()))
}
}
}
pub fn func(&mut self, id: FnID) -> Symbol {
match self.fn_map.get(&id) {
Some(s) => *s,
None => {
let wsym = self.reserve();
let sym = *wsym;
self.unwritten_fns.push((wsym, id));
self.fn_map.insert(id, sym);
sym
}
}
}
pub fn write_ro_data(
&mut self,
sym: WritableSymbol,
data: Vec<u8>,
name: Option<String>,
) -> Symbol {
self.ro_data.push((*sym, data));
self.labels[sym.0 .0] = name;
*sym
}
pub fn write_fn(
&mut self,
sym: WritableSymbol,
func: IRLFunction,
name: Option<String>,
) -> Symbol {
self.fns.push((*sym, func));
self.labels[sym.0 .0] = name;
*sym
}
pub fn reserve(&mut self) -> WritableSymbol {
let val = self.symbols;
self.symbols += 1;
self.labels.push(None);
WritableSymbol(Symbol(val))
}
pub fn len(&self) -> usize {
self.symbols
}
pub fn finish(self) -> Option<SymbolSpace> {
if self.unwritten_fns.is_empty() {
Some(SymbolSpace {
len: self.symbols,
fns: self.fns,
ro_data: self.ro_data,
labels: self.labels,
})
} else {
None
}
}
}
impl std::fmt::Debug for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "@{}", self.0)
}
}
impl std::ops::Deref for Symbol {
type Target = usize;
fn deref(&self) -> &Self::Target {
&self.0
}
}
+20 -13
View File
@@ -1,16 +1,23 @@
//! the IR is split into 2 layers: upper and lower
//! upper handles all of the main language features like types,
//! and the lower is a very concrete format that can be easily
//! translated to assembly and will probably also include
//! the majority of optimization, but not sure
mod upper;
mod lower;
mod id;
mod asm;
pub mod arch;
pub use upper::*;
pub use lower::*;
mod structs;
pub use id::*;
pub use structs::*;
pub struct Ir {
pub root: Id<Namespace>,
pub namespaces: IdVec<Namespace>,
}
impl Ir {
pub fn root(&mut self) -> &mut Namespace {
&mut self.namespaces[self.root]
}
}
impl Default for Ir {
fn default() -> Self {
let mut namespaces = IdVec::default();
let root = namespaces.add(Namespace::default());
Self { root, namespaces }
}
}
+38
View File
@@ -0,0 +1,38 @@
mod namespace;
pub use namespace::*;
use super::Id;
pub struct Fn {
pub body: Body,
}
pub struct Body {
pub statements: Vec<Statement>,
}
pub struct Statement {
ty: StatementTy,
}
pub enum StatementTy {
Define { target: VarId, val: VarId },
Assign { target: VarId, val: VarId },
Call { target: VarId, args: Vec<VarId> },
}
pub struct Var {
const_: bool,
ty: TypeId,
}
pub enum Type {
Unsigned(u8),
Signed(u8),
Array(TypeId),
Ptr(TypeId),
Infer,
}
pub type VarId = u32;
pub type TypeId = u32;
+11
View File
@@ -0,0 +1,11 @@
use super::*;
use std::collections::HashMap;
#[derive(Default)]
pub struct Namespace {
pub items: HashMap<String, Item>,
}
pub enum Item {
Import(Id<Namespace>),
}
-140
View File
@@ -1,140 +0,0 @@
use std::fmt::Display;
use super::*;
/// a generic identifier for all (identifiable) kinds
/// eg. a::b::c.d.e
/// or a::Result<T,_>
pub struct UIdent {
pub status: IdentStatus,
pub origin: Origin,
}
pub enum IdentStatus {
Res(Res),
// lets you do things like import and then specialize in multiple places
// eg. import SomeStruct ...... f() -> SomeStruct // type ....... SomeStruct {} // struct
// and then have correct errors like "expected struct, found type Bla"
Ref(IdentID),
Unres {
base: ResBase,
path: Vec<MemberIdent>,
},
Failed(Option<ResErr>),
Cooked,
}
pub struct MemberIdent {
pub ty: MemberTy,
pub name: String,
pub gargs: Vec<TypeID>,
pub origin: Origin,
}
#[derive(Clone, Copy)]
pub enum MemberTy {
Member,
Field,
}
impl MemberTy {
pub fn sep(&self) -> &'static str {
match self {
MemberTy::Member => "::",
MemberTy::Field => ".",
}
}
}
impl Display for MemberTy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
MemberTy::Member => "member",
MemberTy::Field => "field",
})
}
}
#[derive(Debug, Clone)]
pub enum Res {
Var(VarID),
Fn(FnInst),
Struct(StructInst),
Type(TypeID),
Generic(GenericID),
Module(ModID),
}
impl Res {
pub fn kind(&self) -> KindTy {
match self {
Res::Var(..) => KindTy::Var,
Res::Fn(..) => KindTy::Fn,
Res::Struct(..) => KindTy::Struct,
Res::Type(..) => KindTy::Type,
Res::Module(..) => KindTy::Module,
Res::Generic(..) => KindTy::Generic,
}
}
pub fn display_str(&self, p: &UProgram) -> String {
let name = match self {
Res::Var(id) => &p.vars[id].name,
Res::Fn(fi) => &p.fns[fi.id].name,
Res::Struct(si) => &p.structs[si.id].name,
Res::Type(id) => &p.type_name(id),
Res::Generic(id) => &p.generics[id].name,
Res::Module(id) => &p.modules[id].name,
};
format!("{} '{}'", self.kind(), name)
}
}
#[derive(Clone)]
pub enum ResBase {
Unvalidated(MemRes),
Validated(Res),
}
impl ResBase {
pub fn display_str(&self, p: &UProgram) -> String {
match self {
ResBase::Unvalidated(uv) => uv.display_str(p),
ResBase::Validated(res) => res.display_str(p),
}
}
}
#[derive(Clone)]
pub struct MemRes {
pub mem: Member,
pub origin: Origin,
pub gargs: Vec<TypeID>,
}
impl MemRes {
pub fn display_str(&self, p: &UProgram) -> String {
self.mem.id.display_str(p)
}
}
impl IdentID {
pub fn var(&self, p: &UProgram) -> Option<VarID> {
match p.idents[self].status {
IdentStatus::Res(Res::Var(id)) => Some(id),
_ => None,
}
}
pub fn fun<'a>(&self, p: &'a UProgram) -> Option<&'a FnInst> {
match &p.idents[self].status {
IdentStatus::Res(Res::Fn(i)) => Some(&i),
_ => None,
}
}
pub fn struc<'a>(&self, p: &'a UProgram) -> Option<&'a StructInst> {
match &p.idents[self].status {
IdentStatus::Res(Res::Struct(i)) => Some(&i),
_ => None,
}
}
}
-94
View File
@@ -1,94 +0,0 @@
use std::collections::HashMap;
use super::{arch::riscv64::RV64Instruction, *};
use crate::compiler::arch::riscv::Reg;
pub trait ResStage {
type Var;
type Func;
type Struct;
type Type;
}
pub struct Unresolved;
impl ResStage for Unresolved {
type Var = VarRes;
type Func = IdentID;
type Struct = IdentID;
type Type = TypeRes;
}
pub struct Resolved;
impl ResStage for Resolved {
type Var = VarID;
type Func = FnInst;
type Struct = StructInst;
type Type = TypeID;
}
pub enum UInstruction<S: ResStage = Unresolved> {
Mv {
dst: S::Var,
src: S::Var,
},
Ref {
dst: S::Var,
src: S::Var,
},
Deref {
dst: S::Var,
src: S::Var,
},
LoadData {
dst: S::Var,
src: DataID,
},
LoadSlice {
dst: S::Var,
src: DataID,
},
Call {
dst: S::Var,
f: S::Func,
args: Vec<S::Var>,
},
AsmBlock {
instructions: Vec<RV64Instruction<S::Var>>,
args: Vec<AsmBlockArg<S::Var>>,
},
Ret {
src: S::Var,
},
Construct {
dst: S::Var,
struc: S::Struct,
fields: HashMap<String, S::Var>,
},
If {
cond: S::Var,
body: Vec<InstrID>,
},
Loop {
body: Vec<InstrID>,
},
Break,
Continue,
}
pub struct UInstrInst<S: ResStage = Unresolved> {
pub i: UInstruction<S>,
pub origin: Origin,
}
#[derive(Debug, Clone)]
pub struct AsmBlockArg<V = IdentID> {
pub var: V,
pub reg: Reg,
pub ty: AsmBlockArgType,
}
#[derive(Debug, Clone, Copy)]
pub enum AsmBlockArgType {
In,
Out,
}
-161
View File
@@ -1,161 +0,0 @@
//! all main IR Upper data structures stored in UProgram
use super::*;
use crate::{
common::FileSpan,
ir::{Len, ID},
};
use std::{
collections::HashMap,
fmt::{Debug, Display},
};
pub type NamePath = Vec<String>;
pub type FnID = ID<UFunc>;
pub type VarID = ID<UVar>;
pub type IdentID = ID<UIdent>;
pub type TypeID = ID<Type>;
pub type GenericID = ID<UGeneric>;
pub type StructID = ID<UStruct>;
pub type DataID = ID<UData>;
pub type ModID = ID<UModule>;
pub type InstrID = ID<UInstrInst>;
pub type VarRes = URes<VarID>;
pub type TypeRes = URes<VarID>;
pub struct UFunc {
pub name: String,
pub origin: Origin,
pub args: Vec<VarID>,
pub gargs: Vec<GenericID>,
pub ret: TypeRes,
pub instructions: Vec<InstrID>,
}
pub struct StructField {
pub ty: TypeRes,
pub origin: Origin,
// pub vis: Visibility
}
pub struct UStruct {
pub name: String,
pub origin: Origin,
pub fields: HashMap<String, StructField>,
pub gargs: Vec<GenericID>,
}
pub struct UGeneric {
pub name: String,
pub origin: Origin,
}
pub struct UVar {
pub name: String,
pub origin: Origin,
pub ty: TypeRes,
pub parent: Option<VarID>,
pub children: HashMap<String, VarID>,
}
pub enum VarTy {
Ident(IdentID),
Res(TypeID),
}
#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)]
pub struct VarOffset {
pub id: VarID,
pub offset: Len,
}
#[derive(Clone)]
pub struct UData {
pub name: String,
pub ty: TypeID,
pub content: Vec<u8>,
}
#[derive(Clone)]
pub struct UModule {
pub name: String,
pub members: HashMap<String, Member>,
pub parent: Option<ModID>,
pub func: FnID,
}
#[derive(Clone)]
pub struct Member {
pub id: MemberID,
// pub visibility: Visibility
}
#[derive(Clone)]
pub enum MemberID {
Fn(FnID),
Struct(StructID),
Var(VarID),
Module(ModID),
Type(TypeDef),
}
#[derive(Clone)]
pub struct TypeDef {
pub gargs: Vec<GenericID>,
pub ty: TypeID,
}
impl MemberID {
pub fn kind(&self) -> KindTy {
match self {
MemberID::Fn(_) => KindTy::Fn,
MemberID::Struct(_) => KindTy::Struct,
MemberID::Var(_) => KindTy::Var,
MemberID::Module(_) => KindTy::Module,
MemberID::Type(_) => KindTy::Type,
}
}
pub fn display_str(&self, p: &UProgram) -> String {
let name = match self {
MemberID::Var(id) => &p.vars[id].name,
MemberID::Fn(id) => &p.fns[id].name,
MemberID::Struct(id) => &p.structs[id].name,
MemberID::Module(id) => &p.modules[id].name,
MemberID::Type(def) => &p.type_name(def.ty),
};
format!("{} '{}'", self.kind(), name)
}
}
pub enum URes<T> {
Res(T),
Unres(IdentID),
}
pub type Origin = FileSpan;
// "effective" (externally visible) kinds
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum KindTy {
Type,
Var,
Struct,
Fn,
Module,
Generic,
}
impl Display for KindTy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match self {
KindTy::Type => "type",
KindTy::Var => "variable",
KindTy::Fn => "function",
KindTy::Struct => "struct",
KindTy::Module => "module",
KindTy::Generic => "generic",
})
}
}
-17
View File
@@ -1,17 +0,0 @@
mod instr;
mod kind;
mod program;
mod ty;
mod resolve;
mod error;
mod ident;
use super::*;
pub use instr::*;
pub use kind::*;
pub use program::*;
pub use ty::*;
pub use error::*;
pub use resolve::*;
pub use ident::*;
-178
View File
@@ -1,178 +0,0 @@
use super::*;
pub struct UProgram {
pub fns: Vec<UFunc>,
pub structs: Vec<UStruct>,
pub modules: Vec<UModule>,
pub data: Vec<UData>,
pub generics: Vec<UGeneric>,
pub vars: Vec<UVar>,
pub idents: Vec<UIdent>,
pub types: Vec<Type>,
pub instrs: Vec<UInstrInst>,
pub unres_idents: Vec<IdentID>,
pub unres_instrs: Vec<(FnID, InstrID)>,
pub tc: TypeCache,
}
pub struct TypeCache {
pub unit: TypeID,
pub error: TypeID,
}
impl UProgram {
pub fn new() -> Self {
let mut types = Vec::new();
let tc = TypeCache {
unit: push_id(&mut types, Type::Unit),
error: push_id(&mut types, Type::Error),
};
Self {
fns: Vec::new(),
vars: Vec::new(),
idents: Vec::new(),
structs: Vec::new(),
types: Vec::new(),
generics: Vec::new(),
data: Vec::new(),
modules: Vec::new(),
instrs: Vec::new(),
unres_idents: Vec::new(),
unres_instrs: Vec::new(),
tc,
}
}
pub fn infer(&mut self) -> TypeID {
self.def_ty(Type::Infer)
}
pub fn def_var(&mut self, v: UVar) -> VarID {
push_id(&mut self.vars, v)
}
pub fn def_fn(&mut self, f: UFunc) -> FnID {
push_id(&mut self.fns, f)
}
pub fn def_ty(&mut self, t: Type) -> TypeID {
push_id(&mut self.types, t)
}
pub fn def_ident(&mut self, i: UIdent) -> IdentID {
let id = push_id(&mut self.idents, i);
if let IdentStatus::Unres { .. } = self.idents[id].status {
self.unres_idents.push(id);
}
id
}
pub fn def_generic(&mut self, g: UGeneric) -> GenericID {
push_id(&mut self.generics, g)
}
pub fn def_data(&mut self, d: UData) -> DataID {
push_id(&mut self.data, d)
}
pub fn def_struct(&mut self, s: UStruct) -> StructID {
push_id(&mut self.structs, s)
}
pub fn def_module(&mut self, m: UModule) -> ModID {
push_id(&mut self.modules, m)
}
pub fn res_ty(&self, i: IdentID) -> Option<TypeID> {
self.idents[i].status;
}
pub fn type_name(&self, ty: impl Typed) -> String {
match ty.ty(self) {
Type::Struct(ty) => {
format!(
"{}{}",
self.structs[ty.id].name,
self.gparams_str(&ty.gargs)
)
}
Type::FnInst(ty) => {
format!(
"fn{}({}) -> {}",
&self.gparams_str(&ty.gargs),
&self.type_list_str(self.fns[ty.id].args.iter().map(|v| self.vars[v].ty)),
&self.type_name(self.fns[ty.id].ret)
)
}
Type::Ref(t) => format!("{}&", self.type_name(t)),
Type::Bits(size) => format!("b{}", size),
Type::Array(t, len) => format!("[{}; {len}]", self.type_name(t)),
Type::Unit => "()".to_string(),
Type::Slice(t) => format!("&[{}]", self.type_name(t)),
Type::Infer => "{inferred}".to_string(),
Type::Generic(id) => self.generics[id].name.clone(),
Type::Deref(t) => format!("{}^", self.type_name(t)),
Type::Error => "{error}".to_string(),
Type::Ptr(id) => self.type_name(id),
}
}
pub fn type_list_str(&self, mut args: impl Iterator<Item = TypeID>) -> String {
let mut str = String::new();
if let Some(arg) = args.next() {
str += &self.type_name(arg);
}
for arg in args {
str = str + ", " + &self.type_name(arg);
}
str
}
pub fn gparams_str(&self, args: &[TypeID]) -> String {
let mut str = String::new();
if !args.is_empty() {
str += "<";
}
str += &self.type_list_str(args.iter().cloned());
if !args.is_empty() {
str += ">";
}
str
}
}
pub fn push_id<T>(v: &mut Vec<T>, t: T) -> ID<T> {
let id = ID::new(v.len());
v.push(t);
id
}
// I'm done with names...
pub trait Typed {
fn ty<'a>(&'a self, p: &'a UProgram) -> &'a Type;
}
impl Typed for &Type {
fn ty(&self, _: &UProgram) -> &Type {
self
}
}
impl Typed for TypeID {
fn ty<'a>(&'a self, p: &'a UProgram) -> &'a Type {
&p.types[self]
}
}
impl Typed for &TypeID {
fn ty<'a>(&'a self, p: &'a UProgram) -> &'a Type {
&p.types[*self]
}
}
impl Typed for &Box<Type> {
fn ty<'a>(&'a self, _: &'a UProgram) -> &'a Type {
&**self
}
}
-213
View File
@@ -1,213 +0,0 @@
use crate::common::{CompilerMsg, CompilerOutput};
use super::{
IdentStatus, KindTy, MemberTy, Origin, Res, ResBase, StructID, Type, TypeID, UProgram,
};
pub fn report_errs(p: &UProgram, output: &mut CompilerOutput, mut errs: Vec<ResErr>) {
for ident in &p.idents {
match &ident.status {
IdentStatus::Unres { path, base } => {
let mem = path.last().unwrap();
errs.push(ResErr::UnknownMember {
ty: mem.ty,
name: mem.name.clone(),
origin: mem.origin,
parent: base.clone(),
})
}
IdentStatus::Failed(err) => {
if let Some(err) = err {
errs.push(err.clone())
}
}
_ => (),
}
}
for err in errs {
match err {
ResErr::Type {
dst,
src,
errs,
origin,
} => {
let mut msg = type_assign_err(p, dst, src);
for inner in errs {
if inner.dst != dst && inner.src != src {
msg.push_str("\n ");
msg.push_str(&type_assign_err(p, inner.dst, inner.src));
}
}
output.err(CompilerMsg::new(msg, origin));
}
ResErr::NotCallable { origin, ty } => {
output.err(CompilerMsg::new(
format!("Cannot call type '{}'", p.type_name(ty)),
origin,
));
}
ResErr::CannotDeref { origin, ty } => {
output.err(CompilerMsg::new(
format!("Cannot dereference type '{}'", p.type_name(ty)),
origin,
));
}
ResErr::CondType { origin, ty } => {
output.err(CompilerMsg::new(
format!("Condition types must be '64'; found '{}'", p.type_name(ty)),
origin,
));
}
ResErr::BadControlFlow { origin, op } => {
output.err(CompilerMsg::new(
format!("Cannot {} here (outside of loop)", op.str()),
origin,
));
}
ResErr::MissingField { origin, id, name } => {
output.err(CompilerMsg::new(
format!(
"Missing field '{name}' in creation of struct '{}'",
p.structs[id].name
),
origin,
));
}
ResErr::UnknownStructField { origin, id, name } => {
output.err(CompilerMsg::new(
format!("Unknown field '{name}' in struct '{}'", p.structs[id].name),
origin,
));
}
ResErr::NoReturn { fid } => output.err(CompilerMsg::new(
format!("Function must return a value"),
p.fns[fid].origin,
)),
ResErr::GenericCount {
origin,
expected,
found,
} => output.err(CompilerMsg::new(
if expected == 0 {
format!("No generic arguments expected")
} else {
format!("Expected {expected} generic arguments, found {found}")
},
origin,
)),
ResErr::KindMismatch {
origin,
found,
expected,
} => output.err(CompilerMsg::new(
format!("Expected {expected}, found {}", found.display_str(p)),
origin,
)),
ResErr::UnknownMember {
origin,
ty,
name,
parent,
} => output.err(CompilerMsg::new(
format!("Unknown {ty} {name} of {}", parent.display_str(p)),
origin,
)),
}
}
for var in &p.vars {
if let Some(ty) = var.ty() {
match &p.types[ty] {
Type::Infer => output.err(CompilerMsg::new(
format!("Type of {:?} cannot be inferred", var.name),
var.origin,
)),
_ => (),
}
}
}
}
#[derive(Clone)]
pub enum ResErr {
UnknownMember {
origin: Origin,
ty: MemberTy,
name: String,
parent: ResBase,
},
KindMismatch {
origin: Origin,
expected: KindTy,
found: Res,
},
GenericCount {
origin: Origin,
expected: usize,
found: usize,
},
NotCallable {
origin: Origin,
ty: TypeID,
},
CannotDeref {
origin: Origin,
ty: TypeID,
},
CondType {
origin: Origin,
ty: TypeID,
},
NoReturn {
fid: usize,
},
BadControlFlow {
op: ControlFlowOp,
origin: Origin,
},
MissingField {
origin: Origin,
id: StructID,
name: String,
},
UnknownStructField {
origin: Origin,
id: StructID,
name: String,
},
Type {
dst: TypeID,
src: TypeID,
errs: Vec<TypeMismatch>,
origin: Origin,
},
}
#[derive(Debug, Clone)]
pub enum ControlFlowOp {
Break,
Continue,
}
impl ControlFlowOp {
pub fn str(&self) -> &'static str {
match self {
ControlFlowOp::Break => "break",
ControlFlowOp::Continue => "continue",
}
}
}
#[derive(Debug, Clone)]
pub struct TypeMismatch {
pub dst: TypeID,
pub src: TypeID,
}
pub fn type_assign_err(p: &UProgram, dst: TypeID, src: TypeID) -> String {
format!(
"Cannot assign type {} to {}",
p.type_name(src),
p.type_name(dst)
)
}
-195
View File
@@ -1,195 +0,0 @@
use super::*;
impl UProgram {
pub fn resolve_idents(&mut self, errs: &mut Vec<ResErr>) -> ResolveRes {
let mut resolve_res = ResolveRes::Finished;
'main: for i in std::mem::take(&mut self.unres_idents) {
let mut j = i;
// take from ref if possible
while let IdentStatus::Ref(other) = &self.idents[j].status {
match &self.idents[other].status {
IdentStatus::Res(res) => self.idents[i].status = IdentStatus::Res(res.clone()),
&IdentStatus::Ref(id) => j = id,
IdentStatus::Unres { .. } => {
self.unres_idents.push(i);
continue 'main;
}
IdentStatus::Failed(..) => self.idents[i].status = IdentStatus::Cooked,
IdentStatus::Cooked => self.idents[i].status = IdentStatus::Cooked,
}
}
let status = &mut self.idents[i].status;
// TOOD: there are some clones here that shouldn't be needed
let IdentStatus::Unres { path, base } = status else {
continue;
};
while let Some(mem) = path.pop() {
let res = match base {
ResBase::Unvalidated(u) => {
match u.validate(
&self.fns,
&self.structs,
&self.generics,
&mut self.types,
errs,
) {
Ok(res) => res,
Err(err) => {
*status = IdentStatus::Failed(err);
continue 'main;
}
}
}
ResBase::Validated(res) => res.clone(),
};
*base = match (res, mem.ty) {
(Res::Module(id), MemberTy::Member) => {
let Some(m) = self.modules[id].members.get(&mem.name) else {
self.unres_idents.push(i);
continue 'main;
};
ResBase::Unvalidated(MemRes {
mem: m.clone(),
origin: mem.origin,
gargs: mem.gargs,
})
}
(Res::Var(id), MemberTy::Field) => {
// trait resolution here
let Some(&child) = self.vars[id].children.get(&mem.name) else {
self.unres_idents.push(i);
continue 'main;
};
ResBase::Unvalidated(MemRes {
mem: Member {
id: MemberID::Var(child),
},
origin: mem.origin,
gargs: mem.gargs,
})
}
_ => {
*status = IdentStatus::Failed(Some(ResErr::UnknownMember {
origin: mem.origin,
ty: mem.ty,
name: mem.name.clone(),
parent: base.clone(),
}));
continue 'main;
}
};
}
let res = match base {
ResBase::Unvalidated(u) => {
match u.validate(
&self.fns,
&self.structs,
&self.generics,
&mut self.types,
errs,
) {
Ok(res) => res,
Err(err) => {
*status = IdentStatus::Failed(err);
continue 'main;
}
}
}
ResBase::Validated(res) => res.clone(),
};
*status = IdentStatus::Res(res);
resolve_res = ResolveRes::Unfinished;
}
resolve_res
}
}
impl MemRes {
pub fn validate(
&self,
fns: &[UFunc],
structs: &[UStruct],
generics: &[UGeneric],
types: &mut Vec<Type>,
errs: &mut Vec<ResErr>,
) -> Result<Res, Option<ResErr>> {
let no_gargs = || {
if self.gargs.len() > 0 {
Err(ResErr::GenericCount {
origin: self.origin,
expected: 0,
found: self.gargs.len(),
})
} else {
Ok(())
}
};
Ok(match &self.mem.id {
&MemberID::Fn(id) => {
validate_gargs(
&fns[id].gargs,
&self.gargs,
generics,
types,
errs,
self.origin,
)?;
Res::Fn(FnInst {
id,
gargs: self.gargs.clone(),
})
}
&MemberID::Struct(id) => {
validate_gargs(
&structs[id].gargs,
&self.gargs,
generics,
types,
errs,
self.origin,
)?;
Res::Struct(StructInst {
id,
gargs: self.gargs.clone(),
})
}
&MemberID::Var(id) => {
no_gargs()?;
Res::Var(id)
}
&MemberID::Module(id) => {
no_gargs()?;
Res::Module(id)
}
MemberID::Type(def) => {
validate_gargs(&def.gargs, &self.gargs, generics, types, errs, self.origin)?;
inst_typedef(def, &self.gargs, types);
Res::Type(def.ty)
}
})
}
}
pub fn validate_gargs(
dst: &[GenericID],
src: &[TypeID],
generics: &[UGeneric],
types: &[Type],
errs: &mut Vec<ResErr>,
origin: Origin,
) -> Result<(), Option<ResErr>> {
if dst.len() != src.len() {
return Err(Some(ResErr::GenericCount {
origin,
expected: dst.len(),
found: src.len(),
}));
}
for (dst, src) in dst.iter().zip(src.iter()) {
let g = &generics[dst];
let t = &types[src];
// TODO: validate trait constraints
}
Ok(())
}
-115
View File
@@ -1,115 +0,0 @@
use std::collections::HashMap;
use super::*;
pub fn inst_fn_var(
fi: FnInst,
fns: &[UFunc],
origin: Origin,
vars: &mut Vec<UVar>,
types: &mut Vec<Type>,
) -> VarID {
let name = fns[fi.id].name.clone();
let ty = push_id(types, Type::FnInst(fi));
push_id(
vars,
UVar {
name,
origin,
ty: VarTy::Res(ty),
parent: None,
children: HashMap::new(),
},
)
}
pub fn inst_struct_var(
si: StructInst,
structs: &[UStruct],
origin: Origin,
vars: &mut Vec<UVar>,
types: &mut Vec<Type>,
) -> VarID {
let name = structs[si.id].name.clone();
let ty = push_id(types, Type::Struct(si));
let id = push_id(
vars,
UVar {
name,
origin,
ty: VarTy::Res(ty),
parent: None,
children: HashMap::new(),
},
);
id
}
/// gargs assumed to be valid
pub fn inst_typedef(def: &TypeDef, gargs: &[TypeID], types: &mut Vec<Type>) -> TypeID {
let gmap = inst_gmap(&def.gargs, &gargs);
inst_type(def.ty, types, &gmap)
}
pub fn inst_gmap(dst: &[GenericID], src: &[TypeID]) -> HashMap<GenericID, TypeID> {
let mut gmap = HashMap::new();
for (&gid, &tid) in dst.iter().zip(src) {
gmap.insert(gid, tid);
}
gmap
}
pub fn inst_type(id: TypeID, types: &mut Vec<Type>, gmap: &HashMap<GenericID, TypeID>) -> TypeID {
if gmap.len() == 0 {
return id;
}
match inst_type_(id, types, gmap) {
Some(new) => new,
None => id,
}
}
fn inst_type_(
id: TypeID,
types: &mut Vec<Type>,
gmap: &HashMap<GenericID, TypeID>,
) -> Option<TypeID> {
let ty = match types[id].clone() {
Type::Bits(_) => return None,
Type::Struct(struct_ty) => Type::Struct(StructInst {
id: struct_ty.id,
gargs: inst_all(&struct_ty.gargs, types, gmap)?,
}),
Type::FnInst(fn_ty) => Type::FnInst(FnInst {
id: fn_ty.id,
gargs: inst_all(&fn_ty.gargs, types, gmap)?,
}),
Type::Ref(id) => Type::Ref(inst_type_(id, types, gmap)?),
Type::Slice(id) => Type::Slice(inst_type_(id, types, gmap)?),
Type::Array(id, len) => Type::Array(inst_type_(id, types, gmap)?, len),
Type::Unit => return None,
Type::Generic(gid) => return gmap.get(&gid).map(|id| Some(*id)).unwrap_or_else(|| None),
Type::Infer => Type::Infer,
Type::Deref(id) => Type::Deref(inst_type_(id, types, gmap)?),
Type::Ptr(id) => Type::Ptr(inst_type_(id, types, gmap)?),
Type::Error => return None,
};
Some(push_id(types, ty))
}
fn inst_all(
ids: &[TypeID],
types: &mut Vec<Type>,
gmap: &HashMap<GenericID, TypeID>,
) -> Option<Vec<TypeID>> {
let mut vec = None;
for (i, &id) in ids.iter().enumerate() {
if let Some(id) = inst_type_(id, types, gmap) {
vec.get_or_insert_with(|| ids.iter().take(i).cloned().collect::<Vec<_>>())
.push(id);
} else if let Some(vec) = &mut vec {
vec.push(id)
}
}
vec
}
-182
View File
@@ -1,182 +0,0 @@
use std::collections::HashSet;
use super::*;
pub enum UResEvent {
VarUse(VarID),
}
impl UProgram {
pub fn resolve_instrs(&mut self, errs: &mut Vec<ResErr>) -> ResolveRes {
let mut data = ResData {
changed: false,
types: &mut self.types,
s: Sources {
idents: &mut self.idents,
vars: &mut self.vars,
fns: &self.fns,
structs: &self.structs,
generics: &self.generics,
data: &self.data,
modules: &self.modules,
},
errs,
};
for ids in std::mem::take(&mut self.unres_instrs) {
if let ResolveRes::Unfinished = resolve_instr(ids, &mut self.instrs, &mut data) {
self.unres_instrs.push(ids);
};
}
ResolveRes::Finished
}
}
#[derive(Clone, Copy)]
struct ResolveCtx {
ret: IdentID,
breakable: bool,
i: InstrID,
}
pub fn resolve_instr<'a>(
(fi, ii): (FnID, InstrID),
instrs: &mut Vec<UInstrInst>,
data: &mut ResData<'a>,
) -> ResolveRes {
let instr = &mut instrs[ii];
match &mut instr.i {
UInstruction::Call { dst, f, args } => {
let fi = data.res::<UFunc>(*f);
for &a in args {
data.res::<UVar>(a);
}
data.res::<UVar>(dst);
match fi {
Ok(fi) => {
let f = &data.s.fns[fi.id];
for (&src, &dst) in args.iter().zip(&f.args) {
data.s.constraints.push(UResEvent::AssignVVI { dst, src });
}
}
Err(r) => return r,
}
ResolveRes::Finished
}
UInstruction::Mv { dst, src } => {
res |= data.match_types::<UVar, UVar>(dst, src, src);
}
UInstruction::Ref { dst, src } => {
let dstty = &data.types[data.res_var_ty(dst)?];
let &Type::Ref(dest_ty) = dstty else {
compiler_error()
};
res |= data.match_types::<Type, UVar>(dest_ty, src, src);
}
UInstruction::Deref { dst, src } => {
let srcid = data.res_var_ty(src)?;
let &Type::Ref(src_ty) = data.types[srcid] else {
let origin = src.origin(data);
data.errs.push(ResErr::CannotDeref { origin, ty: srcid });
return None;
};
res |= data.match_types::<UVar, Type>(dst, src_ty, src);
}
UInstruction::LoadData { dst, src } => {
let srcid = src.type_id(&data.s);
res |= data.match_types::<UVar, Type>(dst, srcid, dst);
}
UInstruction::LoadSlice { dst, src } => {
let (dstty, dstid) = data.res_var_ty(dst, ctx)?;
let &Type::Slice(dstty) = dstty else {
compiler_error()
};
let srcid = src.type_id(&data.s);
let Type::Array(srcty, _) = data.types[srcid] else {
compiler_error()
};
res |= data.match_types(dstty, srcty, dst);
}
UInstruction::AsmBlock { instructions, args } => {
// TODO
}
UInstruction::Ret { src } => {
res |= data.match_types::<Type, UVar>(ctx.ret, src, src);
}
UInstruction::Construct { dst, struc, fields } => {
let si = data.res::<UStruct>(dst, ctx)?;
let sid = si.id;
let st = &data.s.structs[sid];
let mut used = HashSet::new();
for (name, field) in &st.fields {
if let Some(src) = fields.get(name) {
used.insert(name);
res |= data.match_types::<Type, UVar>(field.ty, src, src);
} else {
let origin = dst.origin(data);
data.errs.push(ResErr::MissingField {
origin,
id: sid,
name: name.clone(),
});
}
}
for (name, _) in fields {
if !used.contains(name) {
let origin = dst.origin(data);
data.errs.push(ResErr::UnknownStructField {
origin,
id: sid,
name: name.clone(),
});
}
}
}
UInstruction::If { cond, body } => {
if let Some(ty) = data.res_var_ty(cond, ctx) {
if !matches!(ty.0, RType::Bits(64)) {
let id = ty.1;
let origin = cond.origin(data);
data.errs.push(ResErr::CondType { origin, ty: id });
}
}
for i in body {
resolve_instr(
data,
ResolveCtx {
ret: ctx.ret,
breakable: ctx.breakable,
i,
},
);
}
}
UInstruction::Loop { body } => {
for i in body {
resolve_instr(
data,
ResolveCtx {
ret: ctx.ret,
breakable: true,
i,
},
);
}
}
UInstruction::Break => {
if !ctx.breakable {
data.errs.push(ResErr::BadControlFlow {
op: ControlFlowOp::Break,
origin: ctx.i.origin,
});
}
}
UInstruction::Continue => {
if !ctx.breakable {
data.errs.push(ResErr::BadControlFlow {
op: ControlFlowOp::Continue,
origin: ctx.i.origin,
});
}
}
}
}
-146
View File
@@ -1,146 +0,0 @@
use super::*;
pub fn match_types(data: &mut ResData, dst: TypeID, src: TypeID) -> MatchRes {
let Some(dst) = clean_type(data.types, dst) else {
return MatchRes::Finished;
};
let Some(src) = clean_type(data.types, src) else {
return MatchRes::Finished;
};
// prevents this from blowing up I think:
// let mut x, y;
// x = y;
// y = x;
if dst == src {
return MatchRes::Finished;
}
let error = || MatchRes::Error(vec![TypeMismatch { dst, src }]);
match (data.types[dst].clone(), data.types[src].clone()) {
// prefer changing dst over src
(Type::Infer, _) => {
data.changed = true;
data.types[dst] = Type::Ptr(src);
MatchRes::Finished
}
(_, Type::Infer) => {
data.changed = true;
data.types[src] = Type::Ptr(dst);
MatchRes::Finished
}
(Type::Struct(dest), Type::Struct(src)) => {
if dest.id != src.id {
return error();
}
match_all(data, dest.gargs.iter().cloned(), src.gargs.iter().cloned())
}
// (
// Type::Fn {
// args: dst_args,
// ret: dst_ret,
// },
// Type::Fn {
// args: src_args,
// ret: src_ret,
// },
// ) => {
// let dst = dst_args.into_iter().chain(once(dst_ret));
// let src = src_args.into_iter().chain(once(src_ret));
// match_all(data, dst, src)
// }
(Type::Ref(dest), Type::Ref(src)) => match_types(data, dest, src),
(Type::Slice(dest), Type::Slice(src)) => match_types(data, dest, src),
(Type::Array(dest, dlen), Type::Array(src, slen)) => {
if dlen == slen {
match_types(data, dest, src)
} else {
error()
}
}
_ => error(),
}
}
fn match_all(
data: &mut ResData,
dst: impl Iterator<Item = TypeID>,
src: impl Iterator<Item = TypeID>,
) -> MatchRes {
let mut finished = true;
let mut errors = Vec::new();
for (dst, src) in dst.zip(src) {
match match_types(data, dst, src) {
MatchRes::Unfinished => finished = false,
MatchRes::Error(errs) => errors.extend(errs),
MatchRes::Finished => (),
}
}
if finished {
if errors.is_empty() {
MatchRes::Finished
} else {
MatchRes::Error(errors)
}
} else {
MatchRes::Unfinished
}
}
impl<'a> ResData<'a> {
pub fn match_types(
&mut self,
dst: impl MaybeTypeID,
src: impl MaybeTypeID,
origin: impl HasOrigin,
) -> ResolveRes {
let dst = dst.type_id(&self.s)?;
let src = src.type_id(&self.s)?;
let res = match_types(self, dst, src);
match res {
MatchRes::Unfinished => ResolveRes::Unfinished,
MatchRes::Finished => ResolveRes::Finished,
MatchRes::Error(es) => {
self.errs.push(ResErr::Type {
errs: es,
origin: origin.origin(self),
dst,
src,
});
ResolveRes::Finished
}
}
}
}
pub enum MatchRes {
Unfinished,
Finished,
Error(Vec<TypeMismatch>),
}
impl FromResidual<Result<Infallible, MatchRes>> for MatchRes {
fn from_residual(residual: Result<Infallible, MatchRes>) -> Self {
match residual {
Ok(_) => unreachable!(),
Err(r) => r,
}
}
}
pub trait MaybeTypeID {
fn type_id(&self, s: &Sources) -> Result<TypeID, ResolveRes>;
}
impl<T: TypeIDed> MaybeTypeID for T {
fn type_id(&self, s: &Sources) -> Result<TypeID, ResolveRes> {
Ok(self.type_id(s))
}
}
impl MaybeTypeID for VarID {
fn type_id(&self, s: &Sources) -> Result<TypeID, ResolveRes> {
match s.vars[self].ty {
VarTy::Ident(id) => todo!(),
VarTy::Res(id) => Ok(id),
}
}
}
-293
View File
@@ -1,293 +0,0 @@
use super::*;
use crate::{
common::CompilerOutput,
ir::{MemRes, Member},
};
use std::{
convert::Infallible,
ops::{BitOrAssign, FromResidual},
};
mod error;
mod ident;
mod instantiate;
mod instr;
mod matc;
pub use error::*;
use instantiate::*;
impl UProgram {
pub fn resolve(&mut self, output: &mut CompilerOutput) {
self.unres_instrs = (0..self.instrs.len()).map(|i| InstrID::from(i)).collect();
let mut res = ResolveRes::Unfinished;
let mut errs = Vec::new();
while res == ResolveRes::Unfinished {
res = ResolveRes::Finished;
res |= self.resolve_idents(&mut errs);
res |= self.resolve_instrs(&mut errs);
}
for (fid, f) in self.fns.iter().enumerate() {
// this currently works bc expressions create temporary variables
// although you can't do things like loop {return 3} (need to analyze control flow)
if let Some(ty) = self.res_ty(f.ret)
&& self.types[ty] != Type::Unit
&& f.instructions
.last()
.is_none_or(|i| !matches!(self.instrs[i].i, UInstruction::Ret { .. }))
{
errs.push(ResErr::NoReturn { fid });
}
}
report_errs(self, output, errs);
}
}
fn compiler_error() -> ! {
// TODO: this is probably a compiler error / should never happen
panic!("how could this happen to me (you)");
}
struct Sources<'a> {
idents: &'a mut [UIdent],
vars: &'a mut Vec<UVar>,
fns: &'a [UFunc],
structs: &'a [UStruct],
generics: &'a [UGeneric],
data: &'a [UData],
modules: &'a [UModule],
}
struct ResData<'a> {
changed: bool,
types: &'a mut Vec<Type>,
s: Sources<'a>,
errs: &'a mut Vec<ResErr>,
}
impl<'a> ResData<'a> {
pub fn res<K: ResKind>(&mut self, i: IdentID) -> Result<K::Res, ResolveRes> {
i.res_as::<K>(&mut self.s, &mut self.types)
}
pub fn res_ty(&mut self, x: impl Resolvable<Type>) -> Result<TypeID, ResolveRes> {
let id = Resolvable::<Type>::try_res(&x, &mut self.s, self.types, self.errs)?;
resolved_type(self.types, id)
}
pub fn res_var_ty(&mut self, i: IdentID) -> Result<TypeID, ResolveRes> {
let id = self.res::<UVar>(i)?;
let id = match self.s.vars[id].ty {
VarTy::Res(t) => Ok(t),
VarTy::Ident(i) => i.res_as::<Type>(&mut self.s, self.types),
}?;
resolved_type(self.types, id)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ResolveRes {
Finished,
Unfinished,
}
impl BitOrAssign for ResolveRes {
fn bitor_assign(&mut self, rhs: Self) {
match rhs {
ResolveRes::Finished => (),
ResolveRes::Unfinished => *self = ResolveRes::Unfinished,
}
}
}
impl FromResidual<Option<Infallible>> for ResolveRes {
fn from_residual(_: Option<Infallible>) -> Self {
Self::Unfinished
}
}
trait Resolvable<K: ResKind> {
fn try_res(
&self,
s: &mut Sources,
types: &mut Vec<Type>,
errs: &mut Vec<ResErr>,
) -> Result<K::Res, ResolveRes>;
}
impl IdentID {
fn res_as<K: ResKind>(
&self,
s: &mut Sources,
types: &mut Vec<Type>,
) -> Result<K::Res, ResolveRes> {
let origin = s.idents[self].origin;
let res = match &s.idents[self].status {
IdentStatus::Res(res) => res.clone(),
IdentStatus::Ref { .. } => return Err(ResolveRes::Unfinished),
IdentStatus::Unres { .. } => return Err(ResolveRes::Unfinished),
IdentStatus::Failed(..) => return Err(ResolveRes::Finished),
IdentStatus::Cooked => return Err(ResolveRes::Finished),
};
match K::from_res(res, types, s, origin) {
Ok(res) => Ok(res),
Err(res) => {
s.idents[self].status = IdentStatus::Failed(Some(ResErr::KindMismatch {
origin,
expected: K::ty(),
found: res,
}));
Err(ResolveRes::Finished)
}
}
}
}
impl<K: ResKind> Resolvable<K> for &IdentID {
fn try_res(
&self,
s: &mut Sources,
types: &mut Vec<Type>,
errs: &mut Vec<ResErr>,
) -> Result<K::Res, ResolveRes> {
Resolvable::<K>::try_res(*self, s, types, errs)
}
}
impl Resolvable<UVar> for VarID {
fn try_res(
&self,
s: &mut Sources,
types: &mut Vec<Type>,
errs: &mut Vec<ResErr>,
) -> Result<<UVar as ResKind>::Res, ResolveRes> {
Ok(*self)
}
}
impl Resolvable<Type> for TypeID {
fn try_res(
&self,
s: &mut Sources,
types: &mut Vec<Type>,
errs: &mut Vec<ResErr>,
) -> Result<<Type as ResKind>::Res, ResolveRes> {
Ok(*self)
}
}
pub trait ResKind {
type Res;
fn ty() -> KindTy;
fn from_res(
res: Res,
types: &mut Vec<Type>,
s: &mut Sources,
origin: Origin,
) -> Result<Self::Res, Res>;
}
impl ResKind for UFunc {
type Res = FnInst;
fn ty() -> KindTy {
KindTy::Fn
}
fn from_res(res: Res, _: &mut Vec<Type>, _: &mut Sources, _: Origin) -> Result<Self::Res, Res> {
match res {
Res::Fn(fi) => Ok(fi),
_ => Err(res),
}
}
}
impl ResKind for UVar {
type Res = VarID;
fn ty() -> KindTy {
KindTy::Var
}
fn from_res(
res: Res,
types: &mut Vec<Type>,
s: &mut Sources,
origin: Origin,
) -> Result<Self::Res, Res> {
Ok(match res {
Res::Fn(fty) => inst_fn_var(fty, s.fns, origin, s.vars, types),
Res::Var(id) => id,
_ => return Err(res),
})
}
}
impl ResKind for UStruct {
type Res = StructInst;
fn ty() -> KindTy {
KindTy::Struct
}
fn from_res(res: Res, _: &mut Vec<Type>, _: &mut Sources, _: Origin) -> Result<Self::Res, Res> {
match res {
Res::Struct(si) => Ok(si),
_ => Err(res),
}
}
}
impl ResKind for Type {
type Res = TypeID;
fn ty() -> KindTy {
KindTy::Type
}
fn from_res(
res: Res,
types: &mut Vec<Type>,
s: &mut Sources,
_: Origin,
) -> Result<Self::Res, Res> {
Ok(match res {
Res::Struct(si) => push_id(types, Type::Struct(si)),
Res::Type(id) => id,
_ => return Err(res),
})
}
}
pub trait TypeIDed {
fn type_id(&self, s: &Sources) -> TypeID;
}
impl TypeIDed for TypeID {
fn type_id(&self, _: &Sources) -> TypeID {
*self
}
}
impl TypeIDed for DataID {
fn type_id(&self, s: &Sources) -> TypeID {
s.data[self].ty
}
}
impl<T: TypeIDed> TypeIDed for &T {
fn type_id(&self, s: &Sources) -> TypeID {
(*self).type_id(s)
}
}
impl FromResidual<Result<Infallible, ResolveRes>> for ResolveRes {
fn from_residual(residual: Result<Infallible, ResolveRes>) -> Self {
match residual {
Ok(_) => unreachable!(),
Err(r) => r,
}
}
}
trait HasOrigin {
fn origin(&self, data: &ResData) -> Origin;
}
impl HasOrigin for &IdentID {
fn origin(&self, data: &ResData) -> Origin {
data.s.idents[*self].origin
}
}
-101
View File
@@ -1,101 +0,0 @@
use super::{FnID, GenericID, Len, ResolveRes, StructID, TypeID, UProgram, VarID};
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct FieldRef {
pub parent: VarID,
pub name: String,
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct StructInst {
pub id: StructID,
/// assumed to be valid
pub gargs: Vec<TypeID>,
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct FnInst {
pub id: FnID,
/// assumed to be valid
pub gargs: Vec<TypeID>,
}
#[derive(Clone, PartialEq)]
pub enum Type {
Bits(u32),
Struct(StructInst),
// this can be added for constraints later (F: fn(...) -> ...)
// Fn { args: Vec<TypeID>, ret: TypeID },
// "fake" types
FnInst(FnInst),
Ref(TypeID),
Slice(TypeID),
Array(TypeID, Len),
Unit,
Infer,
Generic(GenericID),
Deref(TypeID),
Ptr(TypeID),
Error,
}
impl Type {
pub fn rf(self, p: &mut UProgram) -> Self {
p.def_ty(self).rf()
}
pub fn derf(self, p: &mut UProgram) -> Self {
p.def_ty(self).derf()
}
pub fn arr(self, p: &mut UProgram, len: Len) -> Self {
p.def_ty(self).arr(len)
}
pub fn slice(self, p: &mut UProgram) -> Self {
p.def_ty(self).slice()
}
}
impl TypeID {
pub fn rf(self) -> Type {
Type::Ref(self)
}
pub fn derf(self) -> Type {
Type::Deref(self)
}
pub fn arr(self, len: Len) -> Type {
Type::Array(self, len)
}
pub fn slice(self) -> Type {
Type::Slice(self)
}
}
impl Type {
pub fn bx(self) -> Box<Self> {
Box::new(self)
}
}
pub fn clean_type(types: &[Type], id: TypeID) -> Option<TypeID> {
match &types[id] {
&Type::Ptr(id) => clean_type(types, id),
&Type::Deref(did) => match &types[clean_type(types, did)?] {
&Type::Ref(id) => clean_type(types, id),
_ => Some(id),
},
Type::Error => None,
_ => Some(id),
}
}
pub fn resolved_type(types: &[Type], id: TypeID) -> Result<TypeID, ResolveRes> {
match &types[id] {
&Type::Ptr(id) => resolved_type(types, id),
&Type::Deref(id) => match &types[resolved_type(types, id)?] {
&Type::Ref(id) => resolved_type(types, id),
Type::Infer => Err(ResolveRes::Unfinished),
_ => Err(ResolveRes::Finished),
},
Type::Error => Err(ResolveRes::Finished),
_ => Ok(id),
}
}
+12 -169
View File
@@ -1,178 +1,21 @@
#![feature(box_patterns)]
#![feature(try_trait_v2)]
#![feature(trait_alias)]
#![feature(let_chains)]
#![feature(iterator_try_collect)]
// dawg what
#![feature(str_as_str)]
#![cfg_attr(test, feature(gen_blocks))]
pub const FILE_EXT: &str = "lang";
use crate::{io::CompilerOutput, parser_ir::parse_program};
use common::{CompilerOutput, SrcFile};
use ir::{LProgram, UProgram};
use parser::{Import, Imports, PModule, ParserCtx};
use std::{
collections::HashSet,
fs::{create_dir_all, OpenOptions},
io::stdout,
os::unix::fs::OpenOptionsExt,
path::{Path, PathBuf},
process::Command,
};
mod common;
mod compiler;
mod arch;
mod backend;
mod io;
mod ir;
mod parser;
mod util;
mod parser_ir;
fn main() {
let file = std::env::args_os().nth(1);
// TODO: professional arg parsing
let gdb = std::env::args().nth(2).is_some_and(|a| a == "--debug");
let asm = std::env::args().nth(2).is_some_and(|a| a == "--asm");
if let Some(path) = file {
let path = PathBuf::from(path);
run_file(&path, gdb, asm);
} else {
run_stdin();
}
}
impl UProgram {
pub fn from_path(path: &Path) -> (Self, CompilerOutput) {
let parent = path.parent().expect("bruh");
let mut program = Self::new();
let mut output = CompilerOutput::new();
let mut imports = Imports::new();
imports.insert(Import(vec![path
.file_name()
.expect("bruh")
.to_str()
.expect("bruh")
.to_string()]));
let mut imported = HashSet::new();
let mut fid = 0;
while !imports.is_empty() {
let iter = std::mem::take(&mut imports);
for i in iter {
let import_path = &i.0;
if imported.contains(&i) {
continue;
}
let mut file_path = parent.to_path_buf();
file_path.extend(import_path);
file_path.set_extension(FILE_EXT);
let text = std::fs::read_to_string(&file_path).expect("failed to read file");
output.file_map.insert(
fid,
SrcFile {
path: file_path,
text: text.clone(),
},
);
let mut ctx = ParserCtx::new(fid, text.as_str(), &mut output);
fid += 1;
let res = PModule::parse(&mut ctx);
// println!("Parsed:");
// println!("{:#?}", res.node);
res.lower(import_path.clone(), &mut program, &mut imports, &mut output);
imported.insert(i);
}
}
(program, output)
}
}
fn run_file(path: &Path, gdb: bool, asm: bool) {
let (mut program, mut output) = UProgram::from_path(path);
program.resolve(&mut output);
// println!("vars:");
// for (id, def) in program.iter_vars() {
// println!(" {id:?} = {}: {}", program.names.path(id), program.type_name(&def.ty));
// }
// for (id, f) in program.iter_fns() {
// println!("{}:{id:?} = {:#?}", program.names.path(id), f);
// }
if !output.errs.is_empty() {
output.write_to(&mut stdout());
let mut args = std::env::args();
let Some(path) = args.nth(1) else {
println!("file expected");
return;
}
let program = LProgram::create(&program).expect("morir");
let unlinked = compiler::compile(&program);
if asm {
println!("{:?}", unlinked);
} else {
let bin = unlinked.link().to_elf();
println!("compiled");
save_run(&bin, gdb);
}
output.write_to(&mut stdout());
}
fn save_run(binary: &[u8], run_gdb: bool) {
use std::io::prelude::*;
let dir = Path::new("./build");
create_dir_all(dir).expect("Failed to create or confirm build directory");
let name = Path::new("test");
let path = dir.join(name);
let path = path.as_os_str();
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.mode(0o750)
.open(path)
.expect("Failed to create file");
file.write_all(binary).expect("Failed to write to file");
file.sync_all().expect("Failed to sync file");
println!("running...");
let mut p = Command::new("qemu-riscv64");
let proc = if run_gdb {
p.arg("-g").arg("1234").arg(path).spawn()
} else {
p.arg(path).spawn()
};
if let Ok(mut process) = proc {
if run_gdb {
match Command::new("gdb")
.arg("-q")
.arg("-ex")
.arg("target remote :1234")
.arg(path)
.spawn()
{
Ok(mut gdb) => {
gdb.wait().expect("xd");
}
Err(e) => {
println!("gdb error: {e:?}");
process.kill().expect("uh oh");
}
}
}
if let Ok(status) = process.wait() {
if let Some(code) = status.code() {
std::process::exit(code);
}
}
}
}
pub fn run_stdin() {
println!("todo");
// for line in BufReader::new(std::io::stdin()).lines() {
// let str = &line.expect("failed to read line");
// let mut ctx = ParserCtx::from(&str[..]);
// if let Some(expr) = PStatement::parse_node(&mut ctx).node.as_ref() {
// if ctx.next().is_none() {
// println!("{:?}", expr);
// } else {
// println!("uhhhh ehehe");
// }
// }
// ctx.output.write_for(&mut stdout(), str);
// }
let mut output = CompilerOutput::new();
let ir = parse_program(&path, &mut output);
output.write(&mut std::io::stdout());
}
+38
View File
@@ -0,0 +1,38 @@
use super::Token;
use crate::io::Span;
pub struct Lit {
pub ty: LitTy,
pub span: Span,
}
#[derive(PartialEq)]
pub enum LitTy {
Number(String),
Bool(bool),
String(String),
Unit,
}
impl From<LitTy> for Token {
fn from(value: LitTy) -> Self {
Self::Lit(value)
}
}
impl std::fmt::Display for LitTy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Number(n) => write!(f, "{n}"),
Self::Bool(b) => write!(f, "{b}"),
Self::String(s) => write!(f, "\"{s}\""),
Self::Unit => write!(f, "()"),
}
}
}
impl std::fmt::Display for Lit {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.ty.fmt(f)
}
}
+102
View File
@@ -0,0 +1,102 @@
use std::borrow::Borrow;
use crate::io::{CompilerMsg, Span, Spanned};
mod lit;
mod token;
pub use lit::*;
pub use token::*;
pub struct Cursor<'a> {
pub span: Span,
next: Option<TokenInst>,
tokens: Tokens<'a>,
}
impl<'a> Cursor<'a> {
pub fn new(text: &'a str, file: usize) -> Self {
let mut s = Self {
span: Span {
start: 0,
end: 0,
file,
},
next: None,
tokens: Tokens::new(text, file),
};
s.next();
s
}
pub fn next(&mut self) -> Option<Token> {
let mut next = self.tokens.next();
std::mem::swap(&mut self.next, &mut next);
next.map(|inst| {
self.span = inst.span;
inst.inner
})
}
pub fn next_if(&mut self, token: impl Borrow<Token>) -> bool {
if self.peek().is_some_and(|t| t == token.borrow()) {
self.next();
true
} else {
false
}
}
pub fn peek(&self) -> Option<&Token> {
self.next.as_ref().map(|i| &i.inner)
}
pub fn expect_next(&mut self) -> Result<Token, CompilerMsg> {
self.next().ok_or_else(CompilerMsg::unexpected_eof)
}
pub fn expect_peek(&self) -> Result<&Token, CompilerMsg> {
self.peek().ok_or_else(CompilerMsg::unexpected_eof)
}
pub fn expect(&mut self, token: impl Borrow<Token>) -> Result<Token, CompilerMsg> {
let token = token.borrow();
let next = self.expect_next()?;
if next == *token {
Ok(next)
} else {
self.unexpected(next, &format!("'{token}'"))
}
}
pub fn unexpected<T>(&self, token: Token, expected: &str) -> Result<T, CompilerMsg> {
Err(CompilerMsg::unexpected_token(&token, self.span, expected))
}
pub fn peek_start(&mut self) -> usize {
self.next.as_ref().map(|i| i.span.start).unwrap_or(0)
}
pub fn cur_end(&mut self) -> usize {
self.span.end
}
pub fn file(&mut self) -> usize {
self.span.file
}
}
impl CompilerMsg {
pub fn unexpected_token(token: &Token, span: Span, expected: &str) -> Self {
Self {
spans: vec![span],
msg: format!("Unexpected token '{}', expected {expected}", token),
}
}
pub fn unexpected_eof() -> Self {
Self {
spans: Vec::new(),
msg: "unexpected end of file".to_string(),
}
}
}
+198
View File
@@ -0,0 +1,198 @@
use crate::parser::cursor::LitTy;
use super::{Span, Spanned};
use std::{iter::Peekable, str::CharIndices};
def_tokens! {
symbol {
Dot: ".",
Comma: ",",
Equal: "=",
Colon: ":",
Semicolon: ";",
Plus: "+",
Dash: "-",
Asterisk: "*",
Slash: "/",
OpenParen: "(",
CloseParen: ")",
OpenSquare: "[",
CloseSquare: "]",
OpenCurly: "{",
CloseCurly: "}",
Arrow: "->",
DoubleArrow: "=>",
PlusEqual: "+=",
DashEqual: "-=",
AsteriskEqual: "*=",
SlashEqual: "/=",
Hash: "#",
}
keyword {
Let: "let",
Import: "import",
Fn: "fn",
If: "if",
Loop: "loop",
While: "while",
For: "for",
Match: "match",
Break: "break",
Asm: "asm",
}
other {
Ident(String),
Lit(LitTy),
}
}
pub type TokenInst = Spanned<Token>;
pub struct Tokens<'a> {
file: usize,
chars: Peekable<CharIndices<'a>>,
}
impl<'a> Tokens<'a> {
pub fn new(code: &'a str, file: usize) -> Self {
Self {
file,
chars: code.char_indices().peekable(),
}
}
}
impl Iterator for Tokens<'_> {
type Item = Spanned<Token>;
fn next(&mut self) -> Option<Self::Item> {
let (i, c) = self.chars.next()?;
let mut span = Span {
start: i,
end: i,
file: self.file,
};
if c.is_whitespace() {
return self.next();
}
macro_rules! then {
(_ => $def:expr, $($char:expr => $to:expr,)*) => {
match self.chars.peek() {
$(Some((_, $char)) => {
self.chars.next();
$to
},)*
_ => $def,
}
};
}
let inner = match c {
'.' => Token::Dot,
',' => Token::Comma,
'(' => Token::OpenParen,
')' => Token::CloseParen,
'[' => Token::OpenSquare,
']' => Token::CloseSquare,
'{' => Token::OpenCurly,
'}' => Token::CloseCurly,
'#' => Token::Hash,
'+' => then! {
_ => Token::Plus,
'=' => Token::PlusEqual,
},
'-' => then! {
_ => Token::Dash,
'=' => Token::DashEqual,
'>' => Token::Arrow,
},
'*' => then! {
_ => Token::Asterisk,
'=' => Token::AsteriskEqual,
},
'/' => then! {
_ => Token::Slash,
'=' => Token::SlashEqual,
},
':' => Token::Colon,
';' => Token::Semicolon,
'=' => then! {
_ => Token::Equal,
'>' => Token::DoubleArrow,
},
'0'..='9' => {
let mut s = c.to_string();
while let Some((i, c)) = self.chars.peek()
&& c.is_alphanumeric()
{
s.push(*c);
span.end = *i;
self.chars.next();
}
LitTy::Number(s).into()
}
'"' => {
let mut s = String::new();
while let Some((i, c)) = self.chars.next()
&& !matches!(c, '"')
{
s.push(c);
span.end = i;
}
LitTy::String(s).into()
}
_ => {
let mut s = c.to_string();
while let Some((i, c)) = self.chars.peek()
&& c.is_alphanumeric()
{
s.push(*c);
span.end = *i;
self.chars.next();
}
match s.as_str() {
"true" => LitTy::Bool(true).into(),
"false" => LitTy::Bool(false).into(),
_ => from_str(s),
}
}
};
Some(Spanned { inner, span })
}
}
macro_rules! def_tokens {
{
symbol {
$($sym_name:ident: $sym_str:expr,)*
}
keyword {
$($kw_name:ident: $kw_str:expr,)*
}
other {
$($other_name:ident($data:ty),)*
}
} => {
#[derive(PartialEq)]
pub enum Token {
$($sym_name,)*
$($kw_name,)*
$($other_name($data),)*
}
fn from_str(s: String) -> Token {
match s.as_str() {
$($kw_str => Token::$kw_name,)*
_ => Token::Ident(s),
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
$(Token::$sym_name => write!(f, "{}", $sym_str),)*
$(Token::$kw_name => write!(f, $kw_str),)*
$(Token::$other_name(v) => write!(f, "{v}"),)*
}
}
}
};
}
use def_tokens;
+31 -4
View File
@@ -1,5 +1,32 @@
// mod v1;
// mod v2;
mod v3;
mod cursor;
mod node;
mod nodes;
pub use v3::*;
use std::path::Path;
use cursor::*;
pub use node::*;
pub use nodes::*;
use crate::io::CompilerOutput;
pub fn parse_file(path: impl AsRef<Path>, output: &mut CompilerOutput) -> Option<Body> {
let code = match std::fs::read_to_string(&path) {
Ok(code) => code,
Err(err) => {
output.error(format!("Failed to read input file: {err}"));
return None;
}
};
let file = output.files.len();
output.files.push(path.as_ref().to_path_buf());
let mut ctx = ParseCtx::new(Cursor::new(&code, file));
let root = match ctx.parse() {
Ok(v) => v,
Err(msg) => {
output.error(msg);
return None;
}
};
Some(root)
}
+83
View File
@@ -0,0 +1,83 @@
use crate::{
io::{CompilerMsg, Span},
parser::{
Ident, Node,
cursor::{Cursor, Lit, LitTy, Token},
},
};
pub struct ParseCtx<'a> {
start: usize,
cursor: Cursor<'a>,
}
impl<'a> ParseCtx<'a> {
pub fn new(cursor: Cursor<'a>) -> Self {
Self { start: 0, cursor }
}
pub fn parse_box<N: Node>(&mut self) -> Result<Box<N>, CompilerMsg> {
self.parse_with(N::parse).map(Box::new)
}
pub fn parse<N: Node>(&mut self) -> Result<N, CompilerMsg> {
self.parse_with(N::parse)
}
pub fn parse_with<N: Node>(
&mut self,
f: impl FnOnce(&mut Self) -> Result<N, CompilerMsg>,
) -> Result<N, CompilerMsg> {
let old_start = self.start;
self.start = self.cursor.peek_start();
let res = f(self);
self.start = old_start;
res
}
pub fn ident(&mut self, s: String) -> Ident {
let span = self.cursor.span;
Ident { name: s, span }
}
pub fn lit(&mut self, ty: LitTy) -> Lit {
let span = self.cursor.span;
Lit { ty, span }
}
pub fn span(&mut self) -> Span {
let end = self.cursor.cur_end();
Span {
file: self.cursor.file(),
start: self.start,
end,
}
}
pub fn list<N: Node>(&mut self, sep: Token, end: Token) -> Result<Vec<N>, CompilerMsg> {
let mut list = Vec::new();
if self.next_if(&end) {
return Ok(list);
}
list.push(self.parse()?);
while self.next_if(&sep) {
list.push(self.parse()?);
}
self.expect(end)?;
Ok(list)
}
}
impl<'a> std::ops::Deref for ParseCtx<'a> {
type Target = Cursor<'a>;
fn deref(&self) -> &Self::Target {
&self.cursor
}
}
impl<'a> std::ops::DerefMut for ParseCtx<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.cursor
}
}
+50
View File
@@ -0,0 +1,50 @@
use crate::parser::Node;
#[derive(Clone, Copy)]
pub struct DisplayCtx {
pub indent: usize,
}
pub struct NodeDsp<'a, N: Node> {
pub node: &'a N,
pub ctx: DisplayCtx,
}
impl<N: Node> std::fmt::Display for NodeDsp<'_, N> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.node.fmt(f, self.ctx)
}
}
pub struct VecDsp<'a, N> {
list: &'a Vec<N>,
ctx: DisplayCtx,
}
impl<N: Node> std::fmt::Display for VecDsp<'_, N> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some((last, rest)) = self.list.split_last() {
for arg in rest {
write!(f, "{}, ", arg.dsp(self.ctx))?;
}
write!(f, "{}", last.dsp(self.ctx))?;
}
Ok(())
}
}
pub trait VecDspT<N> {
fn dsp<'a, 'b>(&'a self, ctx: impl Into<DisplayCtx>) -> VecDsp<'b, N>
where
'a: 'b;
}
impl<N> VecDspT<N> for Vec<N> {
fn dsp<'a, 'b>(&'a self, ctx: impl Into<DisplayCtx>) -> VecDsp<'b, N>
where
'a: 'b,
{
let ctx = ctx.into();
VecDsp { list: self, ctx }
}
}
+17
View File
@@ -0,0 +1,17 @@
mod ctx;
mod dsp;
pub use ctx::*;
pub use dsp::*;
use crate::io::CompilerMsg;
pub trait Node: Sized {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg>;
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result;
fn dsp(&self, ctx: DisplayCtx) -> NodeDsp<'_, Self> {
NodeDsp { node: self, ctx }
}
fn new_dsp(&self) -> NodeDsp<'_, Self> {
self.dsp(DisplayCtx { indent: 0 })
}
}
+23
View File
@@ -0,0 +1,23 @@
use crate::{
arch::x86_64::Code,
parser::{Node, cursor::Token},
};
pub mod x86_64;
pub enum AsmBlock {
X86_64(Code),
}
impl Node for AsmBlock {
fn parse(ctx: &mut crate::parser::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
ctx.expect(Token::OpenCurly)?;
let asm = ctx.parse()?;
ctx.expect(Token::CloseCurly)?;
Ok(Self::X86_64(asm))
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result {
write!(f, "asm {{ ... }}")
}
}
+84
View File
@@ -0,0 +1,84 @@
use crate::{
arch::x86_64::*,
io::{CompilerMsg, Span},
parser::{
Node,
cursor::{LitTy, Token},
},
};
impl Node for Code {
fn parse(ctx: &mut crate::parser::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
let mut c = Code::default();
while let Some(Token::Ident(next)) = ctx.peek() {
match next.as_str() {
"mov" => {
ctx.next();
let dst = parse_reg(ctx)?;
ctx.expect(Token::Comma)?;
let src = parse_rmi(ctx)?;
c.mov(dst, src)?;
}
"int" => {
ctx.next();
let Token::Lit(LitTy::Number(num)) = ctx.expect_next()? else {
return Err("Expected an immediate".into());
};
let code = parse_imm(&num, ctx.span)?
.try_into()
.map_err(|_| CompilerMsg::from("Immediate must be a u8"))?;
c.int(code);
}
_ => {
let msg = format!("Unknown instruction {next}");
ctx.next();
return Err(CompilerMsg {
msg,
spans: vec![ctx.span],
});
}
}
}
Ok(c)
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: crate::parser::DisplayCtx) -> std::fmt::Result {
todo!()
}
}
pub fn parse_imm(mut s: &str, span: Span) -> Result<Imm, CompilerMsg> {
let mut radix = 10;
let mut mult = 1;
if s.starts_with('-') {
mult = -1;
s = &s[1..];
}
if s.starts_with("0x") {
radix = 16;
s = &s[2..];
}
let abs = u64::from_str_radix(s, radix)
.map_err(|_| CompilerMsg::from(("invalid immediate", span)))?;
let val = (abs as i128) * mult;
Ok(Imm(val))
}
pub fn parse_rmi(ctx: &mut crate::parser::ParseCtx) -> Result<RegImmMem, CompilerMsg> {
let next = ctx.expect_next()?;
let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register or immediate");
Ok(match &next {
Token::Ident(ident) => RegImmMem::Reg(Reg::parse(ident).ok_or_else(err)?),
Token::Lit(LitTy::Number(num)) => RegImmMem::Imm(parse_imm(num, ctx.span)?),
_ => return Err(err()),
})
}
pub fn parse_reg(ctx: &mut crate::parser::ParseCtx) -> Result<Reg, CompilerMsg> {
let next = ctx.expect_next()?;
let err = || CompilerMsg::unexpected_token(&next, ctx.span, "a register");
let Token::Ident(next) = &next else {
return Err(err());
};
Reg::parse(next).ok_or_else(err)
}
+58
View File
@@ -0,0 +1,58 @@
use super::*;
pub struct Body {
pub items: Vec<Expr>,
pub final_semicolon: bool,
pub span: Span,
}
impl Node for Body {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let mut items = Vec::new();
fn at_end(ctx: &mut ParseCtx) -> bool {
ctx.peek().is_none_or(|t| *t == Token::CloseCurly)
}
let final_semicolon = loop {
if at_end(ctx) {
break true;
}
let expr: Expr = ctx.parse()?;
let needs_semicolon = expr.needs_semicolon();
items.push(expr);
if at_end(ctx) {
break false;
}
if needs_semicolon {
ctx.expect(Token::Semicolon)?;
}
while ctx.next_if(Token::Semicolon) {}
};
Ok(Self {
items,
final_semicolon,
span: ctx.span(),
})
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
if let Some((last, rest)) = self.items.split_last() {
for i in rest {
writeln!(
f,
"{}{}{}",
" ".repeat(ctx.indent),
i.dsp(ctx),
if i.needs_semicolon() { ";" } else { "" }
)?;
}
writeln!(
f,
"{}{}{}",
" ".repeat(ctx.indent),
last.dsp(ctx),
if self.final_semicolon { ";" } else { "" }
)?;
}
Ok(())
}
}
+273
View File
@@ -0,0 +1,273 @@
use crate::parser::VecDspT;
pub use super::*;
pub struct Expr {
pub span: Span,
pub ty: ExprTy,
}
pub enum ExprTy {
Block(Body),
Group(Box<Expr>),
Member {
of: Box<Expr>,
field: Ident,
},
Ident(Ident),
Lit(Lit),
Negate(Box<Expr>),
Call {
target: Box<Expr>,
args: Vec<Expr>,
},
Assign {
target: Box<Expr>,
val: Box<Expr>,
},
Define {
target: Box<Expr>,
ty: Option<Type>,
const_: bool,
val: Box<Expr>,
},
If {
cond: Box<Expr>,
body: Box<Expr>,
},
Loop {
body: Box<Expr>,
},
While {
cond: Box<Expr>,
body: Box<Expr>,
},
Import(Ident),
Fn(Box<Func>),
Break,
Asm(AsmBlock),
}
impl Node for Expr {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let mut res = Self::unit(ctx)?;
while let Some(next) = ctx.peek() {
let ty = match next {
Token::Equal => {
ctx.next();
let target = Box::new(res);
let val = Box::new(ctx.parse_with(Self::unit)?);
ExprTy::Assign { target, val }
}
Token::Colon => {
ctx.next();
let target = Box::new(res);
let mut ty = None;
let next = ctx.expect_peek()?;
if !matches!(next, Token::Equal | Token::Colon) {
ty = Some(ctx.parse()?);
}
let const_ = match ctx.expect_next()? {
Token::Equal => false,
Token::Colon => true,
t => ctx.unexpected(t, "an equals = or colon :")?,
};
let val = Box::new(ctx.parse_with(Self::unit)?);
ExprTy::Define {
target,
ty,
val,
const_,
}
}
Token::OpenParen => {
ctx.next();
let target = Box::new(res);
let args = ctx.list(Token::Comma, Token::CloseParen)?;
ExprTy::Call { target, args }
}
Token::Dot => {
ctx.next();
let of = Box::new(res);
let field = ctx.parse()?;
ExprTy::Member { of, field }
}
_ => break,
};
res = Self {
ty,
span: ctx.span(),
};
}
Ok(res)
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
self.ty.fmt(f, ctx)
}
}
impl ExprTy {
fn fmt(&self, f: &mut std::fmt::Formatter, mut ctx: DisplayCtx) -> std::fmt::Result {
match self {
Self::Ident(ident) => ident.fmt(f, ctx),
Self::Group(expr) => write!(f, "({})", expr.dsp(ctx)),
Self::Fn(func) => func.fmt(f, ctx),
Self::Lit(lit) => write!(f, "{}", lit),
Self::Negate(expr) => {
write!(f, "-{}", expr.dsp(ctx))
}
Self::Call { target, args } => {
write!(f, "{}({})", target.dsp(ctx), args.dsp(ctx))
}
Self::Assign { target, val } => {
write!(f, "{} = {}", target.dsp(ctx), val.dsp(ctx))
}
Self::Define {
target,
ty,
val,
const_,
} => {
write!(f, "{} :", target.dsp(ctx))?;
if let Some(ty) = ty {
write!(f, " {} ", ty.dsp(ctx))?;
}
write!(f, "{} {}", if *const_ { ":" } else { "=" }, val.dsp(ctx))
}
Self::Member { of, field } => {
write!(f, "{}.{field}", of.dsp(ctx))
}
Self::If { cond, body } => {
write!(f, "if {} {}", cond.dsp(ctx), body.dsp(ctx))
}
Self::While { cond, body } => {
write!(f, "while {} {}", cond.dsp(ctx), body.dsp(ctx))
}
Self::Loop { body } => {
write!(f, "loop {}", body.dsp(ctx))
}
Self::Block(body) => {
write!(f, "{{")?;
if !body.items.is_empty() {
writeln!(f)?;
ctx.indent += 3;
body.fmt(f, ctx)?;
}
write!(f, "}}")?;
Ok(())
}
Self::Import(ident) => {
write!(f, "import {ident}")
}
Self::Break => {
write!(f, "break")
}
Self::Asm(asm) => asm.fmt(f, ctx),
}
}
}
impl Expr {
pub fn fmt_body(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
match self.ty {
ExprTy::Block(_) => self.fmt(f, ctx),
_ => write!(f, "=> {}", self.dsp(ctx)),
}
}
fn unit(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let ty = match ctx.expect_next()? {
Token::Dash => ExprTy::Negate(ctx.parse_box()?),
Token::Ident(s) => ExprTy::Ident(ctx.ident(s)),
Token::Lit(l) => ExprTy::Lit(ctx.lit(l)),
Token::Fn => ExprTy::Fn(ctx.parse_box()?),
Token::If => {
let cond = ctx.parse_box()?;
let body = Box::new(Self::body(ctx)?);
ExprTy::If { cond, body }
}
Token::While => {
let cond = ctx.parse_box()?;
let body = Box::new(Self::body(ctx)?);
ExprTy::While { cond, body }
}
Token::Loop => {
let body = ctx.parse_box()?;
ExprTy::Loop { body }
}
Token::OpenParen => {
if ctx.next_if(Token::CloseParen) {
ExprTy::Lit(Lit {
ty: LitTy::Unit,
span: ctx.span(),
})
} else {
let inner = ctx.parse_box()?;
ctx.expect(Token::CloseParen)?;
ExprTy::Group(inner)
}
}
Token::OpenCurly => {
let body = ctx.parse()?;
ctx.expect(Token::CloseCurly)?;
ExprTy::Block(body)
}
Token::Break => ExprTy::Break,
Token::Import => {
let ident = ctx.parse()?;
ExprTy::Import(ident)
}
Token::Asm => ExprTy::Asm(ctx.parse()?),
other => return ctx.unexpected(other, "an expression"),
};
Ok(Self {
ty,
span: ctx.span(),
})
}
pub fn is_group(&self) -> bool {
matches!(self.ty, ExprTy::Group(_))
}
pub fn is_block(&self) -> bool {
matches!(self.ty, ExprTy::Block(_))
}
pub fn block(ctx: &mut ParseCtx) -> Result<Expr, CompilerMsg> {
ctx.expect(Token::OpenCurly)?;
let id = ctx.parse()?;
ctx.expect(Token::CloseCurly)?;
Ok(Expr {
ty: ExprTy::Block(id),
span: ctx.span(),
})
}
pub fn body(ctx: &mut ParseCtx) -> Result<Expr, CompilerMsg> {
if ctx.next_if(Token::DoubleArrow) {
ctx.parse()
} else {
ctx.parse_with(Expr::block)
}
}
pub fn ends_with_block(&self) -> bool {
match &self.ty {
ExprTy::Block(..) => true,
ExprTy::Loop { body }
| ExprTy::While { body, .. }
| ExprTy::If { body, .. }
| ExprTy::Negate(body)
| ExprTy::Assign { val: body, .. } => body.ends_with_block(),
ExprTy::Define { val: body, .. } => body.ends_with_block(),
ExprTy::Fn(f) => f.ends_with_block(),
_ => false,
}
}
pub fn needs_semicolon(&self) -> bool {
!self.ends_with_block()
}
}
+49
View File
@@ -0,0 +1,49 @@
use super::*;
pub struct Func {
args: Vec<Param>,
ret: Option<Type>,
body: Expr,
span: Span,
}
impl Node for Func {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
ctx.expect(Token::OpenParen)?;
let args = ctx.list(Token::Comma, Token::CloseParen)?;
let mut ret = None;
if ctx.next_if(Token::Arrow) {
ret = Some(ctx.parse()?);
}
let body = Expr::body(ctx)?;
Ok(Self {
args,
ret,
body,
span: ctx.span(),
})
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
write!(f, "fn")?;
write!(f, "(")?;
if let Some((last, rest)) = self.args.split_last() {
for arg in rest {
write!(f, "{}, ", arg.dsp(ctx))?;
}
write!(f, "{}", last.dsp(ctx))?;
}
write!(f, ") ")?;
if let Some(ret) = &self.ret {
write!(f, "-> {} ", ret.dsp(ctx))?;
}
self.body.fmt_body(f, ctx)?;
Ok(())
}
}
impl Func {
pub fn ends_with_block(&self) -> bool {
self.body.ends_with_block()
}
}
+25
View File
@@ -0,0 +1,25 @@
use super::*;
pub struct Ident {
pub name: String,
pub span: Span,
}
impl Node for Ident {
fn parse(ctx: &mut super::ParseCtx) -> Result<Self, crate::io::CompilerMsg> {
match ctx.expect_next()? {
Token::Ident(ident) => Ok(ctx.ident(ident)),
t => ctx.unexpected(t, "an identifier"),
}
}
fn fmt(&self, f: &mut std::fmt::Formatter, _: DisplayCtx) -> std::fmt::Result {
write!(f, "{}", self.name)
}
}
impl std::fmt::Display for Ident {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.name.fmt(f)
}
}
+78
View File
@@ -0,0 +1,78 @@
use super::*;
pub struct Item {
pub ty: ItemTy,
pub span: Span,
}
pub enum ItemTy {
Let {
name: Ident,
ty: Option<Type>,
val: Expr,
},
Fn(Func),
Expr(Expr),
Import(Ident),
}
impl Node for Item {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let ty = match ctx.expect_peek()? {
Token::Fn => {
ctx.next();
ItemTy::Fn(ctx.parse()?)
}
Token::Let => {
ctx.next();
let name = ctx.parse()?;
let mut ty = None;
if ctx.next_if(Token::Colon) {
ty = Some(ctx.parse()?);
}
ctx.expect(Token::Equal)?;
let val = ctx.parse()?;
ItemTy::Let { name, ty, val }
}
Token::Import => {
ctx.next();
ItemTy::Import(ctx.parse()?)
}
_ => ItemTy::Expr(ctx.parse()?),
};
Ok(Self {
ty,
span: ctx.span(),
})
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
match &self.ty {
ItemTy::Fn(func) => func.fmt(f, ctx)?,
ItemTy::Let { name, ty, val } => {
write!(f, "let {}", name.dsp(ctx))?;
if let Some(ty) = ty {
write!(f, ": {}", ty.dsp(ctx))?;
}
write!(f, " = {}", val.dsp(ctx))?;
}
ItemTy::Expr(expr) => expr.fmt(f, ctx)?,
ItemTy::Import(ident) => write!(f, "import {}", ident.dsp(ctx))?,
}
Ok(())
}
}
impl Item {
pub fn ends_with_block(&self) -> bool {
match &self.ty {
ItemTy::Let { val, .. } => val.ends_with_block(),
ItemTy::Expr(id) => id.ends_with_block(),
ItemTy::Fn(f) => f.ends_with_block(),
ItemTy::Import(ident) => false,
}
}
pub fn needs_semicolon(&self) -> bool {
!self.ends_with_block()
}
}
+18
View File
@@ -0,0 +1,18 @@
mod asm;
mod body;
mod expr;
mod func;
mod ident;
mod param;
mod struct_;
mod ty;
pub use asm::*;
pub use body::*;
pub use expr::*;
pub use func::*;
pub use ident::*;
pub use param::*;
pub use ty::*;
use super::{DisplayCtx, Lit, LitTy, Node, ParseCtx, Token};
use crate::io::{CompilerMsg, Span};
+25
View File
@@ -0,0 +1,25 @@
use super::*;
pub struct Param {
name: Ident,
ty: Option<Type>,
}
impl Node for Param {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
let name = ctx.parse()?;
let mut ty = None;
if ctx.next_if(Token::Colon) {
ty = Some(ctx.parse()?);
}
Ok(Self { name, ty })
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
self.name.fmt(f, ctx)?;
if let Some(ty) = &self.ty {
write!(f, ": {}", ty.dsp(ctx))?;
}
Ok(())
}
}
+8
View File
@@ -0,0 +1,8 @@
use super::*;
pub struct Struct {
name: String,
fields: Vec<Field>,
}
pub struct Field {}
+20
View File
@@ -0,0 +1,20 @@
use super::*;
pub enum Type {
Ident(Ident),
}
impl Node for Type {
fn parse(ctx: &mut ParseCtx) -> Result<Self, CompilerMsg> {
Ok(match ctx.expect_next()? {
Token::Ident(s) => Self::Ident(ctx.ident(s)),
t => ctx.unexpected(t, "a type")?,
})
}
fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result {
match self {
Type::Ident(id) => id.fmt(f, ctx),
}
}
}
-91
View File
@@ -1,91 +0,0 @@
use super::error::ParserError;
use super::token::{CharCursor, Keyword, Symbol, Token, TokenInstance};
use super::FilePos;
pub struct TokenCursor<'a> {
cursor: CharCursor<'a>,
next: Option<TokenInstance>,
next_pos: FilePos,
prev_end: FilePos,
}
impl<'a> TokenCursor<'a> {
pub fn next(&mut self) -> Option<TokenInstance> {
self.prev_end = self.cursor.prev_pos();
self.next_pos = self.cursor.next_pos();
std::mem::replace(&mut self.next, TokenInstance::parse(&mut self.cursor))
}
pub fn expect_next(&mut self) -> Result<TokenInstance, ParserError> {
self.peek().ok_or(ParserError::unexpected_end())?;
Ok(self.next().unwrap())
}
pub fn expect_token(&mut self, t: Token) -> Result<(), ParserError> {
let next = self.expect_next()?;
if t == next.token {
Ok(())
} else {
Err(ParserError::unexpected_token(&next, &format!("{t:?}")))
}
}
pub fn expect_sym(&mut self, symbol: Symbol) -> Result<(), ParserError> {
self.expect_token(Token::Symbol(symbol))
}
pub fn seek_sym(&mut self, symbol: Symbol) {
while self
.next()
.is_some_and(|n| n.token != Token::Symbol(symbol))
{}
}
pub fn seek_syms(&mut self, syms: &[Symbol]) {
while self
.peek()
.is_some_and(|n| !syms.iter().any(|s| n.is_symbol(*s)))
{
self.next();
}
}
pub fn seek(&mut self, f: impl Fn(&TokenInstance) -> bool) -> Option<&TokenInstance> {
loop {
if f(self.peek()?) {
return self.peek();
}
self.next();
}
}
pub fn expect_kw(&mut self, kw: Keyword) -> Result<(), ParserError> {
self.expect_token(Token::Keyword(kw))
}
pub fn peek(&self) -> Option<&TokenInstance> {
self.next.as_ref()
}
pub fn expect_peek(&mut self) -> Result<&TokenInstance, ParserError> {
self.peek().ok_or(ParserError::unexpected_end())
}
pub fn chars(&mut self) -> &mut CharCursor<'a> {
&mut self.cursor
}
pub fn prev_end(&self) -> FilePos {
self.prev_end
}
pub fn next_pos(&self) -> FilePos {
self.next_pos
}
}
impl<'a> From<&'a str> for TokenCursor<'a> {
fn from(string: &'a str) -> Self {
Self::from(CharCursor::from(string))
}
}
impl<'a> From<CharCursor<'a>> for TokenCursor<'a> {
fn from(mut cursor: CharCursor<'a>) -> Self {
let cur = TokenInstance::parse(&mut cursor);
Self {
cursor,
next: cur,
next_pos: FilePos::start(),
prev_end: FilePos::start(),
}
}
}
-62
View File
@@ -1,62 +0,0 @@
use super::{
token::{FileSpan, TokenInstance},
FilePos,
};
#[derive(Debug, Clone)]
pub struct ParserError {
pub msg: String,
pub spans: Vec<FileSpan>,
}
pub struct ParserErrors {
pub errs: Vec<ParserError>,
}
impl ParserError {
pub fn from_instances(instances: &[&TokenInstance], msg: String) -> Self {
ParserError {
msg,
spans: instances.iter().map(|i| i.span).collect(),
}
}
pub fn from_msg(msg: String) -> Self {
Self {
msg,
spans: Vec::new(),
}
}
pub fn at(pos: FilePos, msg: String) -> Self {
Self {
msg,
spans: vec![FileSpan::at(pos)],
}
}
pub fn unexpected_end() -> Self {
Self::from_msg("unexpected end of input".to_string())
}
pub fn unexpected_token(inst: &TokenInstance, expected: &str) -> Self {
let t = &inst.token;
ParserError::from_instances(
&[inst],
format!("unexpected token {t:?}; expected {expected}"),
)
}
pub fn write_for(&self, writer: &mut impl std::io::Write, file: &str) -> std::io::Result<()> {
let after = if self.spans.is_empty() { "" } else { ":" };
writeln!(writer, "error: {}{}", self.msg, after)?;
for span in &self.spans {
span.write_for(writer, file)?;
}
Ok(())
}
}
impl ParserErrors {
pub fn new() -> Self {
Self { errs: Vec::new() }
}
pub fn add(&mut self, err: ParserError) {
self.errs.push(err);
}
}
-47
View File
@@ -1,47 +0,0 @@
use std::io::{stdout, BufRead, BufReader};
mod cursor;
mod error;
mod node;
mod nodes;
mod parse;
mod token;
pub use cursor::*;
pub use error::*;
pub use node::*;
pub use nodes::*;
pub use parse::*;
use token::*;
pub fn parse_file(file: &str) {
let mut errors = ParserErrors::new();
let res = Module::parse_node(&mut TokenCursor::from(file), &mut errors);
println!("{:?}", res.node);
if errors.errs.is_empty() {
let module = res.node.resolve().expect("what");
}
let out = &mut stdout();
for err in errors.errs {
err.write_for(out, file).unwrap();
}
}
pub fn run_stdin() {
for line in BufReader::new(std::io::stdin()).lines() {
let mut errors = ParserErrors::new();
let str = &line.expect("failed to read line");
let mut cursor = TokenCursor::from(&str[..]);
if let Ok(expr) = Statement::parse_node(&mut cursor, &mut errors).node.as_ref() {
if cursor.next().is_none() {
println!("{:?}", expr);
} else {
println!("uhhhh ehehe");
}
}
let out = &mut stdout();
for err in errors.errs {
err.write_for(out, str).unwrap();
}
}
}
-90
View File
@@ -1,90 +0,0 @@
use std::{
fmt::Debug,
ops::{Deref, DerefMut},
};
use super::FileSpan;
pub trait MaybeResolved {
type Inner<T>;
}
pub struct Resolved;
impl MaybeResolved for Resolved {
type Inner<T> = T;
}
pub struct Unresolved;
impl MaybeResolved for Unresolved {
type Inner<T> = Result<T, ()>;
}
pub struct Node<T, R: MaybeResolved> {
pub inner: <R as MaybeResolved>::Inner<T>,
pub span: FileSpan,
}
impl<T> Node<T, Unresolved> {
pub fn new(inner: T, span: FileSpan) -> Self {
Self {
inner: Ok(inner),
span,
}
}
pub fn bx(self) -> Node<Box<T>, Unresolved> {
Node {
inner: self.inner.map(|v| Box::new(v)),
span: self.span,
}
}
}
impl<T, R: MaybeResolved> Deref for Node<T, R> {
type Target = <R as MaybeResolved>::Inner<T>;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl<T, R: MaybeResolved> DerefMut for Node<T, R> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
impl<T: Debug> Debug for Node<T, Unresolved> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match &self.inner {
Ok(v) => v.fmt(f),
Err(_) => f.write_str("{error}"),
}
}
}
impl<T: Debug> Debug for Node<T, Resolved> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.inner.fmt(f)
}
}
pub trait Resolvable<Res> {
fn resolve(self) -> Result<Res, ()>;
}
impl<T: Resolvable<Res>, Res> Resolvable<Node<Res, Resolved>> for Node<T, Unresolved> {
fn resolve(self) -> Result<Node<Res, Resolved>, ()> {
if let Ok(inner) = self.inner {
return Ok(Node {
inner: inner.resolve()?,
span: self.span,
});
}
Err(())
}
}
impl<T: Resolvable<Res>, Res> Resolvable<Box<Res>> for Box<T> {
fn resolve(self) -> Result<Box<Res>, ()> {
Ok(Box::new((*self).resolve()?))
}
}

Some files were not shown because too many files have changed in this diff Show More