From 1d568f8ce359d5a094c460649dee7faae14c2208 Mon Sep 17 00:00:00 2001 From: Shadow Cat Date: Mon, 1 Jun 2026 22:40:24 -0400 Subject: [PATCH] steal from jai --- src/io/mod.rs | 19 +++++-- src/ir/structs/mod.rs | 3 ++ src/ir/structs/namespace.rs | 8 +-- src/main.rs | 11 +--- src/parser/cursor/mod.rs | 6 +-- src/parser/cursor/token.rs | 7 +-- src/parser/mod.rs | 11 ++-- src/parser/node/ctx.rs | 2 +- src/parser/nodes/body.rs | 8 +-- src/parser/nodes/expr.rs | 100 +++++++++++++++++++++++++++++++++--- src/parser/nodes/func.rs | 13 ----- src/parser/nodes/ident.rs | 2 +- src/parser/nodes/mod.rs | 2 - src/parser/nodes/ty.rs | 2 +- src/parser_ir/mod.rs | 98 ++++++++++++++++++++++++++++++----- test/main.lang | 19 ++++--- test/other.lang | 4 +- 17 files changed, 231 insertions(+), 84 deletions(-) diff --git a/src/io/mod.rs b/src/io/mod.rs index 0c161c0..beab14f 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -1,3 +1,5 @@ +use std::path::{Path, PathBuf}; + #[derive(Debug, Clone, Copy)] pub struct Span { pub file: usize, @@ -32,7 +34,7 @@ pub struct CompilerMsg { #[derive(Default)] pub struct CompilerOutput { pub errors: Vec, - pub files: Vec, + pub files: Vec, } impl CompilerOutput { @@ -98,7 +100,7 @@ impl Span { &text[self.start..=srange.end - 1], )?; if *eline != *sline + 1 { - writeln!(w, " ...")?; + writeln!(w, " ...")?; } writeln!( w, @@ -112,10 +114,19 @@ impl Span { } impl From for CompilerMsg { - fn from(value: String) -> Self { + fn from(msg: String) -> Self { Self { spans: Vec::new(), - msg: value.to_string(), + msg, + } + } +} + +impl> From<(S, Span)> for CompilerMsg { + fn from((msg, span): (S, Span)) -> Self { + Self { + spans: vec![span], + msg: msg.into(), } } } diff --git a/src/ir/structs/mod.rs b/src/ir/structs/mod.rs index d1985ca..530afd3 100644 --- a/src/ir/structs/mod.rs +++ b/src/ir/structs/mod.rs @@ -2,3 +2,6 @@ mod namespace; pub use namespace::*; use super::Id; + +pub struct Fn { +} diff --git a/src/ir/structs/namespace.rs b/src/ir/structs/namespace.rs index 457c249..841878a 100644 --- a/src/ir/structs/namespace.rs +++ b/src/ir/structs/namespace.rs @@ -1,17 +1,11 @@ use super::*; -use crate::parser::Ident; use std::collections::HashMap; #[derive(Default)] pub struct Namespace { - pub items: HashMap, + pub items: HashMap, } pub enum Item { Import(Id), } - -// issue: if I try to parse a function body, I'll want to have clear statements such as -// "call trait fn func on x" or "call field func of x", but you (often) can't tell until typed -// x.func -// x'func diff --git a/src/main.rs b/src/main.rs index a9f7b9c..1a1a346 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,4 @@ -use crate::{ - io::CompilerOutput, - parser::{Node, parse_file}, - parser_ir::parse_program, -}; +use crate::{io::CompilerOutput, parser_ir::parse_program}; mod io; mod ir; @@ -16,9 +12,6 @@ fn main() { return; }; let mut output = CompilerOutput::new(); - let root = parse_file(&path, &mut output); - if let Some(root) = root { - print!("{}", root.new_dsp()); - } + let ir = parse_program(&path, &mut output); output.write(&mut std::io::stdout()); } diff --git a/src/parser/cursor/mod.rs b/src/parser/cursor/mod.rs index cb0c518..5f76c4a 100644 --- a/src/parser/cursor/mod.rs +++ b/src/parser/cursor/mod.rs @@ -64,12 +64,12 @@ impl<'a> Cursor<'a> { if next == *token { Ok(next) } else { - self.unexpected(&next, &format!("'{token}'")) + self.unexpected(next, &format!("'{token}'")) } } - pub fn unexpected(&self, token: &Token, expected: &str) -> Result { - Err(CompilerMsg::unexpected_token(token, self.span, expected)) + pub fn unexpected(&self, token: Token, expected: &str) -> Result { + Err(CompilerMsg::unexpected_token(&token, self.span, expected)) } pub fn peek_start(&mut self) -> usize { diff --git a/src/parser/cursor/token.rs b/src/parser/cursor/token.rs index 9ddb271..5d3b4df 100644 --- a/src/parser/cursor/token.rs +++ b/src/parser/cursor/token.rs @@ -26,7 +26,6 @@ def_tokens! { DashEqual: "-=", AsteriskEqual: "*=", SlashEqual: "/=", - DoubleColon: "::", Hash: "#", } keyword { @@ -38,6 +37,7 @@ def_tokens! { While: "while", For: "for", Match: "match", + Break: "break", } other { Ident(String), @@ -112,10 +112,7 @@ impl Iterator for Tokens<'_> { _ => Token::Slash, '=' => Token::SlashEqual, }, - ':' => then! { - _ => Token::Colon, - ':' => Token::DoubleColon, - }, + ':' => Token::Colon, ';' => Token::Semicolon, '=' => then! { _ => Token::Equal, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ff34e65..a279421 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2,22 +2,25 @@ mod cursor; mod node; mod nodes; +use std::path::Path; + use cursor::*; pub use node::*; pub use nodes::*; use crate::io::CompilerOutput; -pub fn parse_file(path: &str, output: &mut CompilerOutput) -> Option { - let code = match std::fs::read_to_string(path) { +pub fn parse_file(path: impl AsRef, output: &mut CompilerOutput) -> Option { + let code = match std::fs::read_to_string(&path) { Ok(code) => code, Err(err) => { output.error(format!("Failed to read input file: {err}")); return None; } }; - output.files.push(path.to_string()); - let mut ctx = ParseCtx::new(Cursor::new(&code, 0)); + let file = output.files.len(); + output.files.push(path.as_ref().to_path_buf()); + let mut ctx = ParseCtx::new(Cursor::new(&code, file)); let root = match ctx.parse() { Ok(v) => v, Err(msg) => { diff --git a/src/parser/node/ctx.rs b/src/parser/node/ctx.rs index 2f6f237..0723c14 100644 --- a/src/parser/node/ctx.rs +++ b/src/parser/node/ctx.rs @@ -30,7 +30,7 @@ impl<'a> ParseCtx<'a> { ) -> Result { let old_start = self.start; self.start = self.cursor.peek_start(); - let res = f(self).map(|r| r); + let res = f(self); self.start = old_start; res } diff --git a/src/parser/nodes/body.rs b/src/parser/nodes/body.rs index 2597b38..6ad98a2 100644 --- a/src/parser/nodes/body.rs +++ b/src/parser/nodes/body.rs @@ -1,7 +1,7 @@ use super::*; pub struct Body { - pub items: Vec, + pub items: Vec, pub final_semicolon: bool, pub span: Span, } @@ -16,9 +16,9 @@ impl Node for Body { if at_end(ctx) { break true; } - let item: Item = ctx.parse()?; - let needs_semicolon = item.needs_semicolon(); - items.push(item); + let expr: Expr = ctx.parse()?; + let needs_semicolon = expr.needs_semicolon(); + items.push(expr); if at_end(ctx) { break false; } diff --git a/src/parser/nodes/expr.rs b/src/parser/nodes/expr.rs index 768880b..90c6b28 100644 --- a/src/parser/nodes/expr.rs +++ b/src/parser/nodes/expr.rs @@ -3,22 +3,48 @@ use crate::parser::VecDspT; pub use super::*; pub struct Expr { - span: Span, - ty: ExprTy, + pub span: Span, + pub ty: ExprTy, } pub enum ExprTy { Block(Body), Group(Box), + Member { + of: Box, + field: Ident, + }, Ident(Ident), Lit(Lit), Negate(Box), - Call { target: Box, args: Vec }, - Assign { target: Box, val: Box }, - If { cond: Box, body: Box }, - Loop { body: Box }, - While { cond: Box, body: Box }, + Call { + target: Box, + args: Vec, + }, + Assign { + target: Box, + val: Box, + }, + Define { + target: Box, + ty: Option, + const_: bool, + val: Box, + }, + If { + cond: Box, + body: Box, + }, + Loop { + body: Box, + }, + While { + cond: Box, + body: Box, + }, + Import(Ident), Fn(Box), + Break, } impl Node for Expr { @@ -32,12 +58,39 @@ impl Node for Expr { let val = Box::new(ctx.parse_with(Self::unit)?); ExprTy::Assign { target, val } } + Token::Colon => { + ctx.next(); + let target = Box::new(res); + let mut ty = None; + let next = ctx.expect_peek()?; + if !matches!(next, Token::Equal | Token::Colon) { + ty = Some(ctx.parse()?); + } + let const_ = match ctx.expect_next()? { + Token::Equal => false, + Token::Colon => true, + t => ctx.unexpected(t, "an equals = or colon :")?, + }; + let val = Box::new(ctx.parse_with(Self::unit)?); + ExprTy::Define { + target, + ty, + val, + const_, + } + } Token::OpenParen => { ctx.next(); let target = Box::new(res); let args = ctx.list(Token::Comma, Token::CloseParen)?; ExprTy::Call { target, args } } + Token::Dot => { + ctx.next(); + let of = Box::new(res); + let field = ctx.parse()?; + ExprTy::Member { of, field } + } _ => break, }; res = Self { @@ -69,6 +122,21 @@ impl ExprTy { Self::Assign { target, val } => { write!(f, "{} = {}", target.dsp(ctx), val.dsp(ctx)) } + Self::Define { + target, + ty, + val, + const_, + } => { + write!(f, "{} :", target.dsp(ctx))?; + if let Some(ty) = ty { + write!(f, " {} ", ty.dsp(ctx))?; + } + write!(f, "{} {}", if *const_ { ":" } else { "=" }, val.dsp(ctx)) + } + Self::Member { of, field } => { + write!(f, "{}.{field}", of.dsp(ctx)) + } Self::If { cond, body } => { write!(f, "if {} {}", cond.dsp(ctx), body.dsp(ctx)) } @@ -88,6 +156,12 @@ impl ExprTy { write!(f, "}}")?; Ok(()) } + Self::Import(ident) => { + write!(f, "import {ident}") + } + Self::Break => { + write!(f, "break") + } } } } @@ -137,7 +211,12 @@ impl Expr { ctx.expect(Token::CloseCurly)?; ExprTy::Block(body) } - other => return ctx.unexpected(&other, "an expression"), + Token::Break => ExprTy::Break, + Token::Import => { + let ident = ctx.parse()?; + ExprTy::Import(ident) + } + other => return ctx.unexpected(other, "an expression"), }; Ok(Self { ty, @@ -179,8 +258,13 @@ impl Expr { | ExprTy::If { body, .. } | ExprTy::Negate(body) | ExprTy::Assign { val: body, .. } => body.ends_with_block(), + | ExprTy::Define { val: body, .. } => body.ends_with_block(), ExprTy::Fn(f) => f.ends_with_block(), _ => false, } } + + pub fn needs_semicolon(&self) -> bool { + !self.ends_with_block() + } } diff --git a/src/parser/nodes/func.rs b/src/parser/nodes/func.rs index 589932e..da122c8 100644 --- a/src/parser/nodes/func.rs +++ b/src/parser/nodes/func.rs @@ -2,7 +2,6 @@ use super::*; pub struct Func { args: Vec, - name: Option, ret: Option, body: Expr, span: Span, @@ -10,14 +9,6 @@ pub struct Func { impl Node for Func { fn parse(ctx: &mut ParseCtx) -> Result { - let mut name = None; - if let Token::Ident(ident) = ctx.expect_peek()? { - // yucky - let ident = ident.to_string(); - ctx.next(); - let ident = ctx.ident(ident); - name = Some(ident); - } ctx.expect(Token::OpenParen)?; let args = ctx.list(Token::Comma, Token::CloseParen)?; let mut ret = None; @@ -29,16 +20,12 @@ impl Node for Func { args, ret, body, - name, span: ctx.span(), }) } fn fmt(&self, f: &mut std::fmt::Formatter, ctx: DisplayCtx) -> std::fmt::Result { write!(f, "fn")?; - if let Some(name) = &self.name { - write!(f, " {name}")?; - } write!(f, "(")?; if let Some((last, rest)) = self.args.split_last() { for arg in rest { diff --git a/src/parser/nodes/ident.rs b/src/parser/nodes/ident.rs index 5881fc3..dc7ced5 100644 --- a/src/parser/nodes/ident.rs +++ b/src/parser/nodes/ident.rs @@ -9,7 +9,7 @@ impl Node for Ident { fn parse(ctx: &mut super::ParseCtx) -> Result { match ctx.expect_next()? { Token::Ident(ident) => Ok(ctx.ident(ident)), - t => ctx.unexpected(&t, "an identifier"), + t => ctx.unexpected(t, "an identifier"), } } diff --git a/src/parser/nodes/mod.rs b/src/parser/nodes/mod.rs index c76c7f5..6c3f9cd 100644 --- a/src/parser/nodes/mod.rs +++ b/src/parser/nodes/mod.rs @@ -2,7 +2,6 @@ mod body; mod expr; mod func; mod ident; -mod item; mod param; mod struct_; mod ty; @@ -10,7 +9,6 @@ pub use body::*; pub use expr::*; pub use func::*; pub use ident::*; -pub use item::*; pub use param::*; pub use ty::*; diff --git a/src/parser/nodes/ty.rs b/src/parser/nodes/ty.rs index ea220ab..423ff92 100644 --- a/src/parser/nodes/ty.rs +++ b/src/parser/nodes/ty.rs @@ -8,7 +8,7 @@ impl Node for Type { fn parse(ctx: &mut ParseCtx) -> Result { Ok(match ctx.expect_next()? { Token::Ident(s) => Self::Ident(ctx.ident(s)), - t => ctx.unexpected(&t, "a type")?, + t => ctx.unexpected(t, "a type")?, }) } diff --git a/src/parser_ir/mod.rs b/src/parser_ir/mod.rs index 703f487..e65798f 100644 --- a/src/parser_ir/mod.rs +++ b/src/parser_ir/mod.rs @@ -1,23 +1,95 @@ -use crate::{ - io::CompilerOutput, - ir::{Ir, Namespace}, - parser::{self, parse_file}, +use std::{ + collections::{HashMap, HashSet}, + path::Path, }; -pub fn parse_program(path: &str, output: &mut CompilerOutput) -> Option { - let root = parse_file(path, output)?; - let mut ir = Ir::default(); - add_defs(ir.root(), &root); +use crate::{ + io::{CompilerMsg, CompilerOutput, Span}, + ir::Ir, + parser::{self, ExprTy, Ident, Node, parse_file}, +}; + +pub fn parse_program(path: impl AsRef, output: &mut CompilerOutput) -> Option { + let path = path.as_ref(); + let mut imports = Imports::default(); + let dir = path.parent().unwrap(); + imports.add(path.file_stem().unwrap().to_str().unwrap()); + while let Some(next) = imports.new.pop() { + imports.done.insert(next.clone()); + let path = dir.join(next + ".lang"); + println!("=== {path:?}"); + let root = parse_file(path, output)?; + print!("{}", root.new_dsp()); + let defs = scan(&mut imports, &root, output); + for (name, spans) in &defs.duplicates { + output.error(CompilerMsg { + msg: format!("Multiple definitions found for {name}"), + spans: spans.clone(), + }); + } + } + + if !output.errors.is_empty() { + return None; + } + + let ir = Ir::default(); Some(ir) } -pub fn add_defs(namespace: &mut Namespace, body: &parser::Body) { +pub fn scan(imports: &mut Imports, body: &parser::Body, output: &mut CompilerOutput) -> Defs { + let mut defs = Defs::default(); for item in &body.items { match &item.ty { - parser::ItemTy::Let { name, ty, val } => todo!(), - parser::ItemTy::Fn(func) => todo!(), - parser::ItemTy::Expr(expr) => todo!(), - parser::ItemTy::Import(ident) => todo!(), + ExprTy::Define { target, const_, .. } if *const_ => match &target.ty { + ExprTy::Ident(name) => defs.add(name), + _ => output.error(("Invalid left hand side of definition", target.span)), + }, + ExprTy::Import(import) => { + defs.add(import); + imports.add(&import.name); + } + _ => (), } } + defs +} + +#[derive(Default)] +pub struct Defs { + map: HashMap, + duplicates: HashMap>, + next_id: usize, +} + +impl Defs { + pub fn add(&mut self, ident: &Ident) { + if let Some(def) = self.map.get(&ident.name) { + if let Some(spans) = self.duplicates.get_mut(&ident.name) { + spans.push(ident.span); + } else { + self.duplicates + .insert(ident.name.clone(), vec![def.1, ident.span]); + } + return; + } + self.map + .insert(ident.name.clone(), (self.next_id, ident.span)); + self.next_id += 1; + } +} + +#[derive(Default)] +pub struct Imports { + done: HashSet, + new: Vec, +} + +impl Imports { + pub fn add(&mut self, name: &str) { + if self.done.contains(name) || self.new.iter().any(|v| v == name) { + return; + } + self.new.push(name.to_string()); + } } diff --git a/test/main.lang b/test/main.lang index db9a600..2b1a365 100644 --- a/test/main.lang +++ b/test/main.lang @@ -1,14 +1,17 @@ -modl other; - -let x: i32 = 3; +x : i32 = 3; while true { - print("hello"); - print(x); -}; + print("hello"); + print(x); + other.thing(); + thing(); + break; +} -let y = true; +y :: true; if y => print("hello"); -fn thing() { +thing :: fn() { } + +import other; diff --git a/test/other.lang b/test/other.lang index 92159b4..0f2e0e5 100644 --- a/test/other.lang +++ b/test/other.lang @@ -1,3 +1,5 @@ -fn thing() { +thing :: fn() { print("hello from other"); } + +import main;